LLVM 18.1.0rc
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
15#include "PPC.h"
16#include "PPCCCState.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/APSInt.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SmallSet.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
59#include "llvm/IR/CallingConv.h"
60#include "llvm/IR/Constant.h"
61#include "llvm/IR/Constants.h"
62#include "llvm/IR/DataLayout.h"
63#include "llvm/IR/DebugLoc.h"
65#include "llvm/IR/Function.h"
66#include "llvm/IR/GlobalValue.h"
67#include "llvm/IR/IRBuilder.h"
69#include "llvm/IR/Intrinsics.h"
70#include "llvm/IR/IntrinsicsPowerPC.h"
71#include "llvm/IR/Module.h"
72#include "llvm/IR/Type.h"
73#include "llvm/IR/Use.h"
74#include "llvm/IR/Value.h"
75#include "llvm/MC/MCContext.h"
76#include "llvm/MC/MCExpr.h"
86#include "llvm/Support/Debug.h"
88#include "llvm/Support/Format.h"
94#include <algorithm>
95#include <cassert>
96#include <cstdint>
97#include <iterator>
98#include <list>
99#include <optional>
100#include <utility>
101#include <vector>
102
103using namespace llvm;
104
105#define DEBUG_TYPE "ppc-lowering"
106
107static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
108cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
109
110static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
111cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
114cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisableSCO("disable-ppc-sco",
117cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
118
119static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
120cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
121
122static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
123cl::desc("use absolute jump tables on ppc"), cl::Hidden);
124
125static cl::opt<bool>
126 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
127 cl::desc("disable vector permute decomposition"),
128 cl::init(true), cl::Hidden);
129
131 "disable-auto-paired-vec-st",
132 cl::desc("disable automatically generated 32byte paired vector stores"),
133 cl::init(true), cl::Hidden);
134
136 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
137 cl::desc("Set minimum number of entries to use a jump table on PPC"));
138
139STATISTIC(NumTailCalls, "Number of tail calls");
140STATISTIC(NumSiblingCalls, "Number of sibling calls");
141STATISTIC(ShufflesHandledWithVPERM,
142 "Number of shuffles lowered to a VPERM or XXPERM");
143STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
144
145static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
146
147static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
148
149static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
150
151// A faster local-exec TLS access sequence (enabled with the
152// -maix-small-local-exec-tls option) can be produced for TLS variables;
153// consistent with the IBM XL compiler, we apply a max size of slightly under
154// 32KB.
156
157// FIXME: Remove this once the bug has been fixed!
159
161 const PPCSubtarget &STI)
162 : TargetLowering(TM), Subtarget(STI) {
163 // Initialize map that relates the PPC addressing modes to the computed flags
164 // of a load/store instruction. The map is used to determine the optimal
165 // addressing mode when selecting load and stores.
166 initializeAddrModeMap();
167 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
168 // arguments are at least 4/8 bytes aligned.
169 bool isPPC64 = Subtarget.isPPC64();
170 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
171
172 // Set up the register classes.
173 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
174 if (!useSoftFloat()) {
175 if (hasSPE()) {
176 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
177 // EFPU2 APU only supports f32
178 if (!Subtarget.hasEFPU2())
179 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
180 } else {
181 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
182 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
183 }
184 }
185
186 // Match BITREVERSE to customized fast code sequence in the td file.
189
190 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
192
193 // Custom lower inline assembly to check for special registers.
196
197 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
198 for (MVT VT : MVT::integer_valuetypes()) {
201 }
202
203 if (Subtarget.isISA3_0()) {
204 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
205 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
206 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
207 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
208 } else {
209 // No extending loads from f16 or HW conversions back and forth.
210 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
213 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
216 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
217 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
218 }
219
220 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
221
222 // PowerPC has pre-inc load and store's.
233 if (!Subtarget.hasSPE()) {
238 }
239
240 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
241 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
242 for (MVT VT : ScalarIntVTs) {
247 }
248
249 if (Subtarget.useCRBits()) {
251
252 if (isPPC64 || Subtarget.hasFPCVT()) {
255 isPPC64 ? MVT::i64 : MVT::i32);
258 isPPC64 ? MVT::i64 : MVT::i32);
259
262 isPPC64 ? MVT::i64 : MVT::i32);
265 isPPC64 ? MVT::i64 : MVT::i32);
266
269 isPPC64 ? MVT::i64 : MVT::i32);
272 isPPC64 ? MVT::i64 : MVT::i32);
273
276 isPPC64 ? MVT::i64 : MVT::i32);
279 isPPC64 ? MVT::i64 : MVT::i32);
280 } else {
285 }
286
287 // PowerPC does not support direct load/store of condition registers.
290
291 // FIXME: Remove this once the ANDI glue bug is fixed:
292 if (ANDIGlueBug)
294
295 for (MVT VT : MVT::integer_valuetypes()) {
298 setTruncStoreAction(VT, MVT::i1, Expand);
299 }
300
301 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
302 }
303
304 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
305 // PPC (the libcall is not available).
310
311 // We do not currently implement these libm ops for PowerPC.
312 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
313 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
314 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
315 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
317 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
318
319 // PowerPC has no SREM/UREM instructions unless we are on P9
320 // On P9 we may use a hardware instruction to compute the remainder.
321 // When the result of both the remainder and the division is required it is
322 // more efficient to compute the remainder from the result of the division
323 // rather than use the remainder instruction. The instructions are legalized
324 // directly because the DivRemPairsPass performs the transformation at the IR
325 // level.
326 if (Subtarget.isISA3_0()) {
331 } else {
336 }
337
338 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
347
348 // Handle constrained floating-point operations of scalar.
349 // TODO: Handle SPE specific operation.
355
360
361 if (!Subtarget.hasSPE()) {
364 }
365
366 if (Subtarget.hasVSX()) {
369 }
370
371 if (Subtarget.hasFSQRT()) {
374 }
375
376 if (Subtarget.hasFPRND()) {
381
386 }
387
388 // We don't support sin/cos/sqrt/fmod/pow
399
400 // MASS transformation for LLVM intrinsics with replicating fast-math flag
401 // to be consistent to PPCGenScalarMASSEntries pass
402 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
415 }
416
417 if (Subtarget.hasSPE()) {
420 } else {
421 setOperationAction(ISD::FMA , MVT::f64, Legal);
422 setOperationAction(ISD::FMA , MVT::f32, Legal);
423 }
424
425 if (Subtarget.hasSPE())
426 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
427
429
430 // If we're enabling GP optimizations, use hardware square root
431 if (!Subtarget.hasFSQRT() &&
432 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
433 Subtarget.hasFRE()))
435
436 if (!Subtarget.hasFSQRT() &&
437 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
438 Subtarget.hasFRES()))
440
441 if (Subtarget.hasFCPSGN()) {
444 } else {
447 }
448
449 if (Subtarget.hasFPRND()) {
454
459 }
460
461 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
462 // instruction xxbrd to speed up scalar BSWAP64.
463 if (Subtarget.isISA3_1()) {
466 } else {
469 ISD::BSWAP, MVT::i64,
470 (Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand);
471 }
472
473 // CTPOP or CTTZ were introduced in P8/P9 respectively
474 if (Subtarget.isISA3_0()) {
475 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
476 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
477 } else {
478 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
479 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
480 }
481
482 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
485 } else {
488 }
489
490 // PowerPC does not have ROTR
493
494 if (!Subtarget.useCRBits()) {
495 // PowerPC does not have Select
500 }
501
502 // PowerPC wants to turn select_cc of FP into fsel when possible.
505
506 // PowerPC wants to optimize integer setcc a bit
507 if (!Subtarget.useCRBits())
509
510 if (Subtarget.hasFPU()) {
514
518 }
519
520 // PowerPC does not have BRCOND which requires SetCC
521 if (!Subtarget.useCRBits())
523
525
526 if (Subtarget.hasSPE()) {
527 // SPE has built-in conversions
534
535 // SPE supports signaling compare of f32/f64.
538 } else {
539 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
542
543 // PowerPC does not have [U|S]INT_TO_FP
548 }
549
550 if (Subtarget.hasDirectMove() && isPPC64) {
555 if (TM.Options.UnsafeFPMath) {
564 }
565 } else {
570 }
571
572 // We cannot sextinreg(i1). Expand to shifts.
574
575 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
576 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
577 // support continuation, user-level threading, and etc.. As a result, no
578 // other SjLj exception interfaces are implemented and please don't build
579 // your own exception handling based on them.
580 // LLVM/Clang supports zero-cost DWARF exception handling.
583
584 // We want to legalize GlobalAddress and ConstantPool nodes into the
585 // appropriate instructions to materialize the address.
596
597 // TRAP is legal.
598 setOperationAction(ISD::TRAP, MVT::Other, Legal);
599
600 // TRAMPOLINE is custom lowered.
603
604 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
606
607 if (Subtarget.is64BitELFABI()) {
608 // VAARG always uses double-word chunks, so promote anything smaller.
610 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
612 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
614 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
616 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
618 } else if (Subtarget.is32BitELFABI()) {
619 // VAARG is custom lowered with the 32-bit SVR4 ABI.
622 } else
624
625 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
626 if (Subtarget.is32BitELFABI())
628 else
630
631 // Use the default implementation.
632 setOperationAction(ISD::VAEND , MVT::Other, Expand);
641
642 // We want to custom lower some of our intrinsics.
648
649 // To handle counter-based loop conditions.
651
656
657 // Comparisons that require checking two conditions.
658 if (Subtarget.hasSPE()) {
663 }
676
679
680 if (Subtarget.has64BitSupport()) {
681 // They also have instructions for converting between i64 and fp.
690 // This is just the low 32 bits of a (signed) fp->i64 conversion.
691 // We cannot do this with Promote because i64 is not a legal type.
694
695 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
698 }
699 } else {
700 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
701 if (Subtarget.hasSPE()) {
704 } else {
707 }
708 }
709
710 // With the instructions enabled under FPCVT, we can do everything.
711 if (Subtarget.hasFPCVT()) {
712 if (Subtarget.has64BitSupport()) {
721 }
722
731 }
732
733 if (Subtarget.use64BitRegs()) {
734 // 64-bit PowerPC implementations can support i64 types directly
735 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
736 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
738 // 64-bit PowerPC wants to expand i128 shifts itself.
742 } else {
743 // 32-bit PowerPC wants to expand i64 shifts itself.
747 }
748
749 // PowerPC has better expansions for funnel shifts than the generic
750 // TargetLowering::expandFunnelShift.
751 if (Subtarget.has64BitSupport()) {
754 }
757
758 if (Subtarget.hasVSX()) {
763 }
764
765 if (Subtarget.hasAltivec()) {
766 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
771 }
772 // First set operation action for all vector types to expand. Then we
773 // will selectively turn on ones that can be effectively codegen'd.
775 // add/sub are legal for all supported vector VT's.
778
779 // For v2i64, these are only valid with P8Vector. This is corrected after
780 // the loop.
781 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
786 }
787 else {
792 }
793
794 if (Subtarget.hasVSX()) {
797 }
798
799 // Vector instructions introduced in P8
800 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
803 }
804 else {
807 }
808
809 // Vector instructions introduced in P9
810 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
812 else
814
815 // We promote all shuffles to v16i8.
817 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
818
819 // We promote all non-typed operations to v4i32.
821 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
823 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
825 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
827 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
829 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
832 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
834 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
835
836 // No other operations are legal.
875
876 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
877 setTruncStoreAction(VT, InnerVT, Expand);
880 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
881 }
882 }
884 if (!Subtarget.hasP8Vector()) {
885 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
886 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
887 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
888 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
889 }
890
891 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
892 // with merges, splats, etc.
894
895 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
896 // are cheap, so handle them before they get expanded to scalar.
902
903 setOperationAction(ISD::AND , MVT::v4i32, Legal);
904 setOperationAction(ISD::OR , MVT::v4i32, Legal);
905 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
906 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
908 Subtarget.useCRBits() ? Legal : Expand);
909 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
919 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
922
923 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
924 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
925 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
926 if (Subtarget.hasAltivec())
927 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
929 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
930 if (Subtarget.hasP8Altivec())
931 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
932
933 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
934 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
935 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
936 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
937
938 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
939 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
940
941 if (Subtarget.hasVSX()) {
942 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
943 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
945 }
946
947 if (Subtarget.hasP8Altivec())
948 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
949 else
950 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
951
952 if (Subtarget.isISA3_1()) {
953 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
954 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
955 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
956 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
957 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
958 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
959 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
960 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
961 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
962 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
963 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
964 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
965 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
966 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
967 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
968 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
969 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
970 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
971 }
972
973 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
974 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
975
978
983
984 // Altivec does not contain unordered floating-point compare instructions
985 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
987 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
989
990 if (Subtarget.hasVSX()) {
993 if (Subtarget.hasP8Vector()) {
996 }
997 if (Subtarget.hasDirectMove() && isPPC64) {
1006 }
1008
1009 // The nearbyint variants are not allowed to raise the inexact exception
1010 // so we can only code-gen them with unsafe math.
1011 if (TM.Options.UnsafeFPMath) {
1014 }
1015
1016 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1017 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1018 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1020 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1021 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1024
1026 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1027 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1030
1031 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1032 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1033
1034 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1035 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1036
1037 // Share the Altivec comparison restrictions.
1038 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1039 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1040 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1041 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1042
1043 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1044 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1045
1047
1048 if (Subtarget.hasP8Vector())
1049 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1050
1051 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1052
1053 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1054 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1055 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1056
1057 if (Subtarget.hasP8Altivec()) {
1058 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1059 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1060 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1061
1062 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1063 // SRL, but not for SRA because of the instructions available:
1064 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1065 // doing
1066 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1067 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1068 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1069
1070 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1071 }
1072 else {
1073 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1074 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1075 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1076
1077 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1078
1079 // VSX v2i64 only supports non-arithmetic operations.
1080 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1081 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1082 }
1083
1084 if (Subtarget.isISA3_1())
1085 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1086 else
1087 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1088
1089 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1090 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1092 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1093
1095
1104
1105 // Custom handling for partial vectors of integers converted to
1106 // floating point. We already have optimal handling for v2i32 through
1107 // the DAG combine, so those aren't necessary.
1124
1125 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1126 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1127 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1128 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1131
1134
1135 // Handle constrained floating-point operations of vector.
1136 // The predictor is `hasVSX` because altivec instruction has
1137 // no exception but VSX vector instruction has.
1151
1165
1166 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1167 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1168
1169 for (MVT FPT : MVT::fp_valuetypes())
1170 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1171
1172 // Expand the SELECT to SELECT_CC
1174
1175 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1176 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1177
1178 // No implementation for these ops for PowerPC.
1180 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1181 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1182 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1184 setOperationAction(ISD::FREM, MVT::f128, Expand);
1185 }
1186
1187 if (Subtarget.hasP8Altivec()) {
1188 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1189 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1190 }
1191
1192 if (Subtarget.hasP9Vector()) {
1195
1196 // Test data class instructions store results in CR bits.
1197 if (Subtarget.useCRBits()) {
1201 }
1202
1203 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1204 // SRL, but not for SRA because of the instructions available:
1205 // VS{RL} and VS{RL}O.
1206 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1207 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1208 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1209
1210 setOperationAction(ISD::FADD, MVT::f128, Legal);
1211 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1212 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1213 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1215
1216 setOperationAction(ISD::FMA, MVT::f128, Legal);
1223
1225 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1227 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1230
1234
1235 // Handle constrained floating-point operations of fp128
1252 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1253 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1254 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1255 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1256 } else if (Subtarget.hasVSX()) {
1259
1260 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1261 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1262
1263 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1264 // fp_to_uint and int_to_fp.
1267
1268 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1269 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1270 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1271 setOperationAction(ISD::FABS, MVT::f128, Expand);
1273 setOperationAction(ISD::FMA, MVT::f128, Expand);
1275
1276 // Expand the fp_extend if the target type is fp128.
1279
1280 // Expand the fp_round if the source type is fp128.
1281 for (MVT VT : {MVT::f32, MVT::f64}) {
1284 }
1285
1290
1291 // Lower following f128 select_cc pattern:
1292 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1294
1295 // We need to handle f128 SELECT_CC with integer result type.
1297 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1298 }
1299
1300 if (Subtarget.hasP9Altivec()) {
1301 if (Subtarget.isISA3_1()) {
1306 } else {
1309 }
1317
1318 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1319 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1320 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1321 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1322 }
1323
1324 if (Subtarget.hasP10Vector()) {
1326 }
1327 }
1328
1329 if (Subtarget.pairedVectorMemops()) {
1330 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1331 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1332 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1333 }
1334 if (Subtarget.hasMMA()) {
1335 if (Subtarget.isISAFuture())
1336 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1337 else
1338 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1339 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1340 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1342 }
1343
1344 if (Subtarget.has64BitSupport())
1346
1347 if (Subtarget.isISA3_1())
1348 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1349
1350 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1351
1352 if (!isPPC64) {
1355 }
1356
1361 }
1362
1364
1365 if (Subtarget.hasAltivec()) {
1366 // Altivec instructions set fields to all zeros or all ones.
1368 }
1369
1370 setLibcallName(RTLIB::MULO_I128, nullptr);
1371 if (!isPPC64) {
1372 // These libcalls are not available in 32-bit.
1373 setLibcallName(RTLIB::SHL_I128, nullptr);
1374 setLibcallName(RTLIB::SRL_I128, nullptr);
1375 setLibcallName(RTLIB::SRA_I128, nullptr);
1376 setLibcallName(RTLIB::MUL_I128, nullptr);
1377 setLibcallName(RTLIB::MULO_I64, nullptr);
1378 }
1379
1382 else if (isPPC64)
1384 else
1386
1387 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1388
1389 // We have target-specific dag combine patterns for the following nodes:
1392 if (Subtarget.hasFPCVT())
1395 if (Subtarget.useCRBits())
1399
1401
1403
1404 if (Subtarget.useCRBits()) {
1406 }
1407
1408 setLibcallName(RTLIB::LOG_F128, "logf128");
1409 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1410 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1411 setLibcallName(RTLIB::EXP_F128, "expf128");
1412 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1413 setLibcallName(RTLIB::SIN_F128, "sinf128");
1414 setLibcallName(RTLIB::COS_F128, "cosf128");
1415 setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
1416 setLibcallName(RTLIB::POW_F128, "powf128");
1417 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1418 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1419 setLibcallName(RTLIB::REM_F128, "fmodf128");
1420 setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1421 setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1422 setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1423 setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1424 setLibcallName(RTLIB::ROUND_F128, "roundf128");
1425 setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1426 setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1427 setLibcallName(RTLIB::RINT_F128, "rintf128");
1428 setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1429 setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1430 setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1431 setLibcallName(RTLIB::FMA_F128, "fmaf128");
1432 setLibcallName(RTLIB::FREXP_F128, "frexpf128");
1433
1434 if (Subtarget.isAIXABI()) {
1435 setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
1436 setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
1437 setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
1438 setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
1439 }
1440
1441 // With 32 condition bits, we don't need to sink (and duplicate) compares
1442 // aggressively in CodeGenPrep.
1443 if (Subtarget.useCRBits()) {
1446 }
1447
1448 // TODO: The default entry number is set to 64. This stops most jump table
1449 // generation on PPC. But it is good for current PPC HWs because the indirect
1450 // branch instruction mtctr to the jump table may lead to bad branch predict.
1451 // Re-evaluate this value on future HWs that can do better with mtctr.
1453
1455
1456 switch (Subtarget.getCPUDirective()) {
1457 default: break;
1458 case PPC::DIR_970:
1459 case PPC::DIR_A2:
1460 case PPC::DIR_E500:
1461 case PPC::DIR_E500mc:
1462 case PPC::DIR_E5500:
1463 case PPC::DIR_PWR4:
1464 case PPC::DIR_PWR5:
1465 case PPC::DIR_PWR5X:
1466 case PPC::DIR_PWR6:
1467 case PPC::DIR_PWR6X:
1468 case PPC::DIR_PWR7:
1469 case PPC::DIR_PWR8:
1470 case PPC::DIR_PWR9:
1471 case PPC::DIR_PWR10:
1475 break;
1476 }
1477
1478 if (Subtarget.enableMachineScheduler())
1480 else
1482
1484
1485 // The Freescale cores do better with aggressive inlining of memcpy and
1486 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1487 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1488 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1489 MaxStoresPerMemset = 32;
1491 MaxStoresPerMemcpy = 32;
1495 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1496 // The A2 also benefits from (very) aggressive inlining of memcpy and
1497 // friends. The overhead of a the function call, even when warm, can be
1498 // over one hundred cycles.
1499 MaxStoresPerMemset = 128;
1500 MaxStoresPerMemcpy = 128;
1501 MaxStoresPerMemmove = 128;
1502 MaxLoadsPerMemcmp = 128;
1503 } else {
1506 }
1507
1508 IsStrictFPEnabled = true;
1509
1510 // Let the subtarget (CPU) decide if a predictable select is more expensive
1511 // than the corresponding branch. This information is used in CGP to decide
1512 // when to convert selects into branches.
1514}
1515
1516// *********************************** NOTE ************************************
1517// For selecting load and store instructions, the addressing modes are defined
1518// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1519// patterns to match the load the store instructions.
1520//
1521// The TD definitions for the addressing modes correspond to their respective
1522// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1523// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1524// address mode flags of a particular node. Afterwards, the computed address
1525// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1526// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1527// accordingly, based on the preferred addressing mode.
1528//
1529// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1530// MemOpFlags contains all the possible flags that can be used to compute the
1531// optimal addressing mode for load and store instructions.
1532// AddrMode contains all the possible load and store addressing modes available
1533// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1534//
1535// When adding new load and store instructions, it is possible that new address
1536// flags may need to be added into MemOpFlags, and a new addressing mode will
1537// need to be added to AddrMode. An entry of the new addressing mode (consisting
1538// of the minimal and main distinguishing address flags for the new load/store
1539// instructions) will need to be added into initializeAddrModeMap() below.
1540// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1541// need to be updated to account for selecting the optimal addressing mode.
1542// *****************************************************************************
1543/// Initialize the map that relates the different addressing modes of the load
1544/// and store instructions to a set of flags. This ensures the load/store
1545/// instruction is correctly matched during instruction selection.
1546void PPCTargetLowering::initializeAddrModeMap() {
1547 AddrModesMap[PPC::AM_DForm] = {
1548 // LWZ, STW
1553 // LBZ, LHZ, STB, STH
1558 // LHA
1563 // LFS, LFD, STFS, STFD
1568 };
1569 AddrModesMap[PPC::AM_DSForm] = {
1570 // LWA
1574 // LD, STD
1578 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1582 };
1583 AddrModesMap[PPC::AM_DQForm] = {
1584 // LXV, STXV
1588 };
1589 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1591 // TODO: Add mapping for quadword load/store.
1592}
1593
1594/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1595/// the desired ByVal argument alignment.
1596static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1597 if (MaxAlign == MaxMaxAlign)
1598 return;
1599 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1600 if (MaxMaxAlign >= 32 &&
1601 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1602 MaxAlign = Align(32);
1603 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1604 MaxAlign < 16)
1605 MaxAlign = Align(16);
1606 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1607 Align EltAlign;
1608 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1609 if (EltAlign > MaxAlign)
1610 MaxAlign = EltAlign;
1611 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1612 for (auto *EltTy : STy->elements()) {
1613 Align EltAlign;
1614 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1615 if (EltAlign > MaxAlign)
1616 MaxAlign = EltAlign;
1617 if (MaxAlign == MaxMaxAlign)
1618 break;
1619 }
1620 }
1621}
1622
1623/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1624/// function arguments in the caller parameter area.
1626 const DataLayout &DL) const {
1627 // 16byte and wider vectors are passed on 16byte boundary.
1628 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1629 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1630 if (Subtarget.hasAltivec())
1631 getMaxByValAlign(Ty, Alignment, Align(16));
1632 return Alignment.value();
1633}
1634
1636 return Subtarget.useSoftFloat();
1637}
1638
1640 return Subtarget.hasSPE();
1641}
1642
1644 return VT.isScalarInteger();
1645}
1646
1648 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1649 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1650 return false;
1651
1652 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1653 if (VTy->getScalarType()->isIntegerTy()) {
1654 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1655 if (ElemSizeInBits == 32) {
1656 Index = Subtarget.isLittleEndian() ? 2 : 1;
1657 return true;
1658 }
1659 if (ElemSizeInBits == 64) {
1660 Index = Subtarget.isLittleEndian() ? 1 : 0;
1661 return true;
1662 }
1663 }
1664 }
1665 return false;
1666}
1667
1668const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1669 switch ((PPCISD::NodeType)Opcode) {
1670 case PPCISD::FIRST_NUMBER: break;
1671 case PPCISD::FSEL: return "PPCISD::FSEL";
1672 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1673 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1674 case PPCISD::FCFID: return "PPCISD::FCFID";
1675 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1676 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1677 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1678 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1679 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1680 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1681 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1682 case PPCISD::FRE: return "PPCISD::FRE";
1683 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1684 case PPCISD::FTSQRT:
1685 return "PPCISD::FTSQRT";
1686 case PPCISD::FSQRT:
1687 return "PPCISD::FSQRT";
1688 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1689 case PPCISD::VPERM: return "PPCISD::VPERM";
1690 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1692 return "PPCISD::XXSPLTI_SP_TO_DP";
1694 return "PPCISD::XXSPLTI32DX";
1695 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1696 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1697 case PPCISD::XXPERM:
1698 return "PPCISD::XXPERM";
1699 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1700 case PPCISD::CMPB: return "PPCISD::CMPB";
1701 case PPCISD::Hi: return "PPCISD::Hi";
1702 case PPCISD::Lo: return "PPCISD::Lo";
1703 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1704 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1705 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1706 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1707 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1708 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1709 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1710 case PPCISD::SRL: return "PPCISD::SRL";
1711 case PPCISD::SRA: return "PPCISD::SRA";
1712 case PPCISD::SHL: return "PPCISD::SHL";
1713 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1714 case PPCISD::CALL: return "PPCISD::CALL";
1715 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1716 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1717 case PPCISD::CALL_RM:
1718 return "PPCISD::CALL_RM";
1720 return "PPCISD::CALL_NOP_RM";
1722 return "PPCISD::CALL_NOTOC_RM";
1723 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1724 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1725 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1726 case PPCISD::BCTRL_RM:
1727 return "PPCISD::BCTRL_RM";
1729 return "PPCISD::BCTRL_LOAD_TOC_RM";
1730 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1731 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1732 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1733 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1734 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1735 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1736 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1737 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1738 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1739 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1741 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1743 return "PPCISD::ANDI_rec_1_EQ_BIT";
1745 return "PPCISD::ANDI_rec_1_GT_BIT";
1746 case PPCISD::VCMP: return "PPCISD::VCMP";
1747 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1748 case PPCISD::LBRX: return "PPCISD::LBRX";
1749 case PPCISD::STBRX: return "PPCISD::STBRX";
1750 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1751 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1752 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1753 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1754 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1755 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1756 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1757 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1758 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1760 return "PPCISD::ST_VSR_SCAL_INT";
1761 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1762 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1763 case PPCISD::BDZ: return "PPCISD::BDZ";
1764 case PPCISD::MFFS: return "PPCISD::MFFS";
1765 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1766 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1767 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1768 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1769 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1770 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1771 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1772 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1773 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1774 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1775 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1776 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1777 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1778 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1779 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1780 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1781 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1782 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1783 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1784 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1785 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1787 return "PPCISD::PADDI_DTPREL";
1788 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1789 case PPCISD::SC: return "PPCISD::SC";
1790 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1791 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1792 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1793 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1794 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1795 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1796 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1797 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1798 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1799 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1800 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1801 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1803 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1805 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1806 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1807 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1808 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1809 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1810 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1811 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1812 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1813 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1815 return "PPCISD::STRICT_FADDRTZ";
1817 return "PPCISD::STRICT_FCTIDZ";
1819 return "PPCISD::STRICT_FCTIWZ";
1821 return "PPCISD::STRICT_FCTIDUZ";
1823 return "PPCISD::STRICT_FCTIWUZ";
1825 return "PPCISD::STRICT_FCFID";
1827 return "PPCISD::STRICT_FCFIDU";
1829 return "PPCISD::STRICT_FCFIDS";
1831 return "PPCISD::STRICT_FCFIDUS";
1832 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1833 case PPCISD::STORE_COND:
1834 return "PPCISD::STORE_COND";
1835 }
1836 return nullptr;
1837}
1838
1840 EVT VT) const {
1841 if (!VT.isVector())
1842 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1843
1845}
1846
1848 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1849 return true;
1850}
1851
1852//===----------------------------------------------------------------------===//
1853// Node matching predicates, for use by the tblgen matching code.
1854//===----------------------------------------------------------------------===//
1855
1856/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1858 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1859 return CFP->getValueAPF().isZero();
1860 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1861 // Maybe this has already been legalized into the constant pool?
1862 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1863 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1864 return CFP->getValueAPF().isZero();
1865 }
1866 return false;
1867}
1868
1869/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1870/// true if Op is undef or if it matches the specified value.
1871static bool isConstantOrUndef(int Op, int Val) {
1872 return Op < 0 || Op == Val;
1873}
1874
1875/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1876/// VPKUHUM instruction.
1877/// The ShuffleKind distinguishes between big-endian operations with
1878/// two different inputs (0), either-endian operations with two identical
1879/// inputs (1), and little-endian operations with two different inputs (2).
1880/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1882 SelectionDAG &DAG) {
1883 bool IsLE = DAG.getDataLayout().isLittleEndian();
1884 if (ShuffleKind == 0) {
1885 if (IsLE)
1886 return false;
1887 for (unsigned i = 0; i != 16; ++i)
1888 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1889 return false;
1890 } else if (ShuffleKind == 2) {
1891 if (!IsLE)
1892 return false;
1893 for (unsigned i = 0; i != 16; ++i)
1894 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1895 return false;
1896 } else if (ShuffleKind == 1) {
1897 unsigned j = IsLE ? 0 : 1;
1898 for (unsigned i = 0; i != 8; ++i)
1899 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1900 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1901 return false;
1902 }
1903 return true;
1904}
1905
1906/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1907/// VPKUWUM instruction.
1908/// The ShuffleKind distinguishes between big-endian operations with
1909/// two different inputs (0), either-endian operations with two identical
1910/// inputs (1), and little-endian operations with two different inputs (2).
1911/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1913 SelectionDAG &DAG) {
1914 bool IsLE = DAG.getDataLayout().isLittleEndian();
1915 if (ShuffleKind == 0) {
1916 if (IsLE)
1917 return false;
1918 for (unsigned i = 0; i != 16; i += 2)
1919 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1920 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1921 return false;
1922 } else if (ShuffleKind == 2) {
1923 if (!IsLE)
1924 return false;
1925 for (unsigned i = 0; i != 16; i += 2)
1926 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1927 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1928 return false;
1929 } else if (ShuffleKind == 1) {
1930 unsigned j = IsLE ? 0 : 2;
1931 for (unsigned i = 0; i != 8; i += 2)
1932 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1933 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1934 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1935 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1936 return false;
1937 }
1938 return true;
1939}
1940
1941/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1942/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1943/// current subtarget.
1944///
1945/// The ShuffleKind distinguishes between big-endian operations with
1946/// two different inputs (0), either-endian operations with two identical
1947/// inputs (1), and little-endian operations with two different inputs (2).
1948/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1950 SelectionDAG &DAG) {
1951 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1952 if (!Subtarget.hasP8Vector())
1953 return false;
1954
1955 bool IsLE = DAG.getDataLayout().isLittleEndian();
1956 if (ShuffleKind == 0) {
1957 if (IsLE)
1958 return false;
1959 for (unsigned i = 0; i != 16; i += 4)
1960 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1961 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1962 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1963 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1964 return false;
1965 } else if (ShuffleKind == 2) {
1966 if (!IsLE)
1967 return false;
1968 for (unsigned i = 0; i != 16; i += 4)
1969 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1970 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1971 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1972 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1973 return false;
1974 } else if (ShuffleKind == 1) {
1975 unsigned j = IsLE ? 0 : 4;
1976 for (unsigned i = 0; i != 8; i += 4)
1977 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1978 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1979 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1980 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1981 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1982 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1983 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1984 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1985 return false;
1986 }
1987 return true;
1988}
1989
1990/// isVMerge - Common function, used to match vmrg* shuffles.
1991///
1992static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1993 unsigned LHSStart, unsigned RHSStart) {
1994 if (N->getValueType(0) != MVT::v16i8)
1995 return false;
1996 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1997 "Unsupported merge size!");
1998
1999 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
2000 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
2001 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2002 LHSStart+j+i*UnitSize) ||
2003 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2004 RHSStart+j+i*UnitSize))
2005 return false;
2006 }
2007 return true;
2008}
2009
2010/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2011/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2012/// The ShuffleKind distinguishes between big-endian merges with two
2013/// different inputs (0), either-endian merges with two identical inputs (1),
2014/// and little-endian merges with two different inputs (2). For the latter,
2015/// the input operands are swapped (see PPCInstrAltivec.td).
2017 unsigned ShuffleKind, SelectionDAG &DAG) {
2018 if (DAG.getDataLayout().isLittleEndian()) {
2019 if (ShuffleKind == 1) // unary
2020 return isVMerge(N, UnitSize, 0, 0);
2021 else if (ShuffleKind == 2) // swapped
2022 return isVMerge(N, UnitSize, 0, 16);
2023 else
2024 return false;
2025 } else {
2026 if (ShuffleKind == 1) // unary
2027 return isVMerge(N, UnitSize, 8, 8);
2028 else if (ShuffleKind == 0) // normal
2029 return isVMerge(N, UnitSize, 8, 24);
2030 else
2031 return false;
2032 }
2033}
2034
2035/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2036/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2037/// The ShuffleKind distinguishes between big-endian merges with two
2038/// different inputs (0), either-endian merges with two identical inputs (1),
2039/// and little-endian merges with two different inputs (2). For the latter,
2040/// the input operands are swapped (see PPCInstrAltivec.td).
2042 unsigned ShuffleKind, SelectionDAG &DAG) {
2043 if (DAG.getDataLayout().isLittleEndian()) {
2044 if (ShuffleKind == 1) // unary
2045 return isVMerge(N, UnitSize, 8, 8);
2046 else if (ShuffleKind == 2) // swapped
2047 return isVMerge(N, UnitSize, 8, 24);
2048 else
2049 return false;
2050 } else {
2051 if (ShuffleKind == 1) // unary
2052 return isVMerge(N, UnitSize, 0, 0);
2053 else if (ShuffleKind == 0) // normal
2054 return isVMerge(N, UnitSize, 0, 16);
2055 else
2056 return false;
2057 }
2058}
2059
2060/**
2061 * Common function used to match vmrgew and vmrgow shuffles
2062 *
2063 * The indexOffset determines whether to look for even or odd words in
2064 * the shuffle mask. This is based on the of the endianness of the target
2065 * machine.
2066 * - Little Endian:
2067 * - Use offset of 0 to check for odd elements
2068 * - Use offset of 4 to check for even elements
2069 * - Big Endian:
2070 * - Use offset of 0 to check for even elements
2071 * - Use offset of 4 to check for odd elements
2072 * A detailed description of the vector element ordering for little endian and
2073 * big endian can be found at
2074 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2075 * Targeting your applications - what little endian and big endian IBM XL C/C++
2076 * compiler differences mean to you
2077 *
2078 * The mask to the shuffle vector instruction specifies the indices of the
2079 * elements from the two input vectors to place in the result. The elements are
2080 * numbered in array-access order, starting with the first vector. These vectors
2081 * are always of type v16i8, thus each vector will contain 16 elements of size
2082 * 8. More info on the shuffle vector can be found in the
2083 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2084 * Language Reference.
2085 *
2086 * The RHSStartValue indicates whether the same input vectors are used (unary)
2087 * or two different input vectors are used, based on the following:
2088 * - If the instruction uses the same vector for both inputs, the range of the
2089 * indices will be 0 to 15. In this case, the RHSStart value passed should
2090 * be 0.
2091 * - If the instruction has two different vectors then the range of the
2092 * indices will be 0 to 31. In this case, the RHSStart value passed should
2093 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2094 * to 31 specify elements in the second vector).
2095 *
2096 * \param[in] N The shuffle vector SD Node to analyze
2097 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2098 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2099 * vector to the shuffle_vector instruction
2100 * \return true iff this shuffle vector represents an even or odd word merge
2101 */
2102static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2103 unsigned RHSStartValue) {
2104 if (N->getValueType(0) != MVT::v16i8)
2105 return false;
2106
2107 for (unsigned i = 0; i < 2; ++i)
2108 for (unsigned j = 0; j < 4; ++j)
2109 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2110 i*RHSStartValue+j+IndexOffset) ||
2111 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2112 i*RHSStartValue+j+IndexOffset+8))
2113 return false;
2114 return true;
2115}
2116
2117/**
2118 * Determine if the specified shuffle mask is suitable for the vmrgew or
2119 * vmrgow instructions.
2120 *
2121 * \param[in] N The shuffle vector SD Node to analyze
2122 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2123 * \param[in] ShuffleKind Identify the type of merge:
2124 * - 0 = big-endian merge with two different inputs;
2125 * - 1 = either-endian merge with two identical inputs;
2126 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2127 * little-endian merges).
2128 * \param[in] DAG The current SelectionDAG
2129 * \return true iff this shuffle mask
2130 */
2132 unsigned ShuffleKind, SelectionDAG &DAG) {
2133 if (DAG.getDataLayout().isLittleEndian()) {
2134 unsigned indexOffset = CheckEven ? 4 : 0;
2135 if (ShuffleKind == 1) // Unary
2136 return isVMerge(N, indexOffset, 0);
2137 else if (ShuffleKind == 2) // swapped
2138 return isVMerge(N, indexOffset, 16);
2139 else
2140 return false;
2141 }
2142 else {
2143 unsigned indexOffset = CheckEven ? 0 : 4;
2144 if (ShuffleKind == 1) // Unary
2145 return isVMerge(N, indexOffset, 0);
2146 else if (ShuffleKind == 0) // Normal
2147 return isVMerge(N, indexOffset, 16);
2148 else
2149 return false;
2150 }
2151 return false;
2152}
2153
2154/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2155/// amount, otherwise return -1.
2156/// The ShuffleKind distinguishes between big-endian operations with two
2157/// different inputs (0), either-endian operations with two identical inputs
2158/// (1), and little-endian operations with two different inputs (2). For the
2159/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2160int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2161 SelectionDAG &DAG) {
2162 if (N->getValueType(0) != MVT::v16i8)
2163 return -1;
2164
2165 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2166
2167 // Find the first non-undef value in the shuffle mask.
2168 unsigned i;
2169 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2170 /*search*/;
2171
2172 if (i == 16) return -1; // all undef.
2173
2174 // Otherwise, check to see if the rest of the elements are consecutively
2175 // numbered from this value.
2176 unsigned ShiftAmt = SVOp->getMaskElt(i);
2177 if (ShiftAmt < i) return -1;
2178
2179 ShiftAmt -= i;
2180 bool isLE = DAG.getDataLayout().isLittleEndian();
2181
2182 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2183 // Check the rest of the elements to see if they are consecutive.
2184 for (++i; i != 16; ++i)
2185 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2186 return -1;
2187 } else if (ShuffleKind == 1) {
2188 // Check the rest of the elements to see if they are consecutive.
2189 for (++i; i != 16; ++i)
2190 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2191 return -1;
2192 } else
2193 return -1;
2194
2195 if (isLE)
2196 ShiftAmt = 16 - ShiftAmt;
2197
2198 return ShiftAmt;
2199}
2200
2201/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2202/// specifies a splat of a single element that is suitable for input to
2203/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2205 EVT VT = N->getValueType(0);
2206 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2207 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2208
2209 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2210 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2211
2212 // The consecutive indices need to specify an element, not part of two
2213 // different elements. So abandon ship early if this isn't the case.
2214 if (N->getMaskElt(0) % EltSize != 0)
2215 return false;
2216
2217 // This is a splat operation if each element of the permute is the same, and
2218 // if the value doesn't reference the second vector.
2219 unsigned ElementBase = N->getMaskElt(0);
2220
2221 // FIXME: Handle UNDEF elements too!
2222 if (ElementBase >= 16)
2223 return false;
2224
2225 // Check that the indices are consecutive, in the case of a multi-byte element
2226 // splatted with a v16i8 mask.
2227 for (unsigned i = 1; i != EltSize; ++i)
2228 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2229 return false;
2230
2231 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2232 if (N->getMaskElt(i) < 0) continue;
2233 for (unsigned j = 0; j != EltSize; ++j)
2234 if (N->getMaskElt(i+j) != N->getMaskElt(j))
2235 return false;
2236 }
2237 return true;
2238}
2239
2240/// Check that the mask is shuffling N byte elements. Within each N byte
2241/// element of the mask, the indices could be either in increasing or
2242/// decreasing order as long as they are consecutive.
2243/// \param[in] N the shuffle vector SD Node to analyze
2244/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2245/// Word/DoubleWord/QuadWord).
2246/// \param[in] StepLen the delta indices number among the N byte element, if
2247/// the mask is in increasing/decreasing order then it is 1/-1.
2248/// \return true iff the mask is shuffling N byte elements.
2249static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2250 int StepLen) {
2251 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2252 "Unexpected element width.");
2253 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2254
2255 unsigned NumOfElem = 16 / Width;
2256 unsigned MaskVal[16]; // Width is never greater than 16
2257 for (unsigned i = 0; i < NumOfElem; ++i) {
2258 MaskVal[0] = N->getMaskElt(i * Width);
2259 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2260 return false;
2261 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2262 return false;
2263 }
2264
2265 for (unsigned int j = 1; j < Width; ++j) {
2266 MaskVal[j] = N->getMaskElt(i * Width + j);
2267 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2268 return false;
2269 }
2270 }
2271 }
2272
2273 return true;
2274}
2275
2276bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2277 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2278 if (!isNByteElemShuffleMask(N, 4, 1))
2279 return false;
2280
2281 // Now we look at mask elements 0,4,8,12
2282 unsigned M0 = N->getMaskElt(0) / 4;
2283 unsigned M1 = N->getMaskElt(4) / 4;
2284 unsigned M2 = N->getMaskElt(8) / 4;
2285 unsigned M3 = N->getMaskElt(12) / 4;
2286 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2287 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2288
2289 // Below, let H and L be arbitrary elements of the shuffle mask
2290 // where H is in the range [4,7] and L is in the range [0,3].
2291 // H, 1, 2, 3 or L, 5, 6, 7
2292 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2293 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2294 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2295 InsertAtByte = IsLE ? 12 : 0;
2296 Swap = M0 < 4;
2297 return true;
2298 }
2299 // 0, H, 2, 3 or 4, L, 6, 7
2300 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2301 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2302 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2303 InsertAtByte = IsLE ? 8 : 4;
2304 Swap = M1 < 4;
2305 return true;
2306 }
2307 // 0, 1, H, 3 or 4, 5, L, 7
2308 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2309 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2310 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2311 InsertAtByte = IsLE ? 4 : 8;
2312 Swap = M2 < 4;
2313 return true;
2314 }
2315 // 0, 1, 2, H or 4, 5, 6, L
2316 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2317 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2318 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2319 InsertAtByte = IsLE ? 0 : 12;
2320 Swap = M3 < 4;
2321 return true;
2322 }
2323
2324 // If both vector operands for the shuffle are the same vector, the mask will
2325 // contain only elements from the first one and the second one will be undef.
2326 if (N->getOperand(1).isUndef()) {
2327 ShiftElts = 0;
2328 Swap = true;
2329 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2330 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2331 InsertAtByte = IsLE ? 12 : 0;
2332 return true;
2333 }
2334 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2335 InsertAtByte = IsLE ? 8 : 4;
2336 return true;
2337 }
2338 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2339 InsertAtByte = IsLE ? 4 : 8;
2340 return true;
2341 }
2342 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2343 InsertAtByte = IsLE ? 0 : 12;
2344 return true;
2345 }
2346 }
2347
2348 return false;
2349}
2350
2352 bool &Swap, bool IsLE) {
2353 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2354 // Ensure each byte index of the word is consecutive.
2355 if (!isNByteElemShuffleMask(N, 4, 1))
2356 return false;
2357
2358 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2359 unsigned M0 = N->getMaskElt(0) / 4;
2360 unsigned M1 = N->getMaskElt(4) / 4;
2361 unsigned M2 = N->getMaskElt(8) / 4;
2362 unsigned M3 = N->getMaskElt(12) / 4;
2363
2364 // If both vector operands for the shuffle are the same vector, the mask will
2365 // contain only elements from the first one and the second one will be undef.
2366 if (N->getOperand(1).isUndef()) {
2367 assert(M0 < 4 && "Indexing into an undef vector?");
2368 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2369 return false;
2370
2371 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2372 Swap = false;
2373 return true;
2374 }
2375
2376 // Ensure each word index of the ShuffleVector Mask is consecutive.
2377 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2378 return false;
2379
2380 if (IsLE) {
2381 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2382 // Input vectors don't need to be swapped if the leading element
2383 // of the result is one of the 3 left elements of the second vector
2384 // (or if there is no shift to be done at all).
2385 Swap = false;
2386 ShiftElts = (8 - M0) % 8;
2387 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2388 // Input vectors need to be swapped if the leading element
2389 // of the result is one of the 3 left elements of the first vector
2390 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2391 Swap = true;
2392 ShiftElts = (4 - M0) % 4;
2393 }
2394
2395 return true;
2396 } else { // BE
2397 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2398 // Input vectors don't need to be swapped if the leading element
2399 // of the result is one of the 4 elements of the first vector.
2400 Swap = false;
2401 ShiftElts = M0;
2402 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2403 // Input vectors need to be swapped if the leading element
2404 // of the result is one of the 4 elements of the right vector.
2405 Swap = true;
2406 ShiftElts = M0 - 4;
2407 }
2408
2409 return true;
2410 }
2411}
2412
2414 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2415
2416 if (!isNByteElemShuffleMask(N, Width, -1))
2417 return false;
2418
2419 for (int i = 0; i < 16; i += Width)
2420 if (N->getMaskElt(i) != i + Width - 1)
2421 return false;
2422
2423 return true;
2424}
2425
2427 return isXXBRShuffleMaskHelper(N, 2);
2428}
2429
2431 return isXXBRShuffleMaskHelper(N, 4);
2432}
2433
2435 return isXXBRShuffleMaskHelper(N, 8);
2436}
2437
2439 return isXXBRShuffleMaskHelper(N, 16);
2440}
2441
2442/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2443/// if the inputs to the instruction should be swapped and set \p DM to the
2444/// value for the immediate.
2445/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2446/// AND element 0 of the result comes from the first input (LE) or second input
2447/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2448/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2449/// mask.
2451 bool &Swap, bool IsLE) {
2452 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2453
2454 // Ensure each byte index of the double word is consecutive.
2455 if (!isNByteElemShuffleMask(N, 8, 1))
2456 return false;
2457
2458 unsigned M0 = N->getMaskElt(0) / 8;
2459 unsigned M1 = N->getMaskElt(8) / 8;
2460 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2461
2462 // If both vector operands for the shuffle are the same vector, the mask will
2463 // contain only elements from the first one and the second one will be undef.
2464 if (N->getOperand(1).isUndef()) {
2465 if ((M0 | M1) < 2) {
2466 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2467 Swap = false;
2468 return true;
2469 } else
2470 return false;
2471 }
2472
2473 if (IsLE) {
2474 if (M0 > 1 && M1 < 2) {
2475 Swap = false;
2476 } else if (M0 < 2 && M1 > 1) {
2477 M0 = (M0 + 2) % 4;
2478 M1 = (M1 + 2) % 4;
2479 Swap = true;
2480 } else
2481 return false;
2482
2483 // Note: if control flow comes here that means Swap is already set above
2484 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2485 return true;
2486 } else { // BE
2487 if (M0 < 2 && M1 > 1) {
2488 Swap = false;
2489 } else if (M0 > 1 && M1 < 2) {
2490 M0 = (M0 + 2) % 4;
2491 M1 = (M1 + 2) % 4;
2492 Swap = true;
2493 } else
2494 return false;
2495
2496 // Note: if control flow comes here that means Swap is already set above
2497 DM = (M0 << 1) + (M1 & 1);
2498 return true;
2499 }
2500}
2501
2502
2503/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2504/// appropriate for PPC mnemonics (which have a big endian bias - namely
2505/// elements are counted from the left of the vector register).
2506unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2507 SelectionDAG &DAG) {
2508 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2509 assert(isSplatShuffleMask(SVOp, EltSize));
2510 EVT VT = SVOp->getValueType(0);
2511
2512 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2513 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2514 : SVOp->getMaskElt(0);
2515
2516 if (DAG.getDataLayout().isLittleEndian())
2517 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2518 else
2519 return SVOp->getMaskElt(0) / EltSize;
2520}
2521
2522/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2523/// by using a vspltis[bhw] instruction of the specified element size, return
2524/// the constant being splatted. The ByteSize field indicates the number of
2525/// bytes of each element [124] -> [bhw].
2527 SDValue OpVal;
2528
2529 // If ByteSize of the splat is bigger than the element size of the
2530 // build_vector, then we have a case where we are checking for a splat where
2531 // multiple elements of the buildvector are folded together into a single
2532 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2533 unsigned EltSize = 16/N->getNumOperands();
2534 if (EltSize < ByteSize) {
2535 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2536 SDValue UniquedVals[4];
2537 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2538
2539 // See if all of the elements in the buildvector agree across.
2540 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2541 if (N->getOperand(i).isUndef()) continue;
2542 // If the element isn't a constant, bail fully out.
2543 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2544
2545 if (!UniquedVals[i&(Multiple-1)].getNode())
2546 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2547 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2548 return SDValue(); // no match.
2549 }
2550
2551 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2552 // either constant or undef values that are identical for each chunk. See
2553 // if these chunks can form into a larger vspltis*.
2554
2555 // Check to see if all of the leading entries are either 0 or -1. If
2556 // neither, then this won't fit into the immediate field.
2557 bool LeadingZero = true;
2558 bool LeadingOnes = true;
2559 for (unsigned i = 0; i != Multiple-1; ++i) {
2560 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2561
2562 LeadingZero &= isNullConstant(UniquedVals[i]);
2563 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2564 }
2565 // Finally, check the least significant entry.
2566 if (LeadingZero) {
2567 if (!UniquedVals[Multiple-1].getNode())
2568 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2569 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2570 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2571 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2572 }
2573 if (LeadingOnes) {
2574 if (!UniquedVals[Multiple-1].getNode())
2575 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2576 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2577 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2578 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2579 }
2580
2581 return SDValue();
2582 }
2583
2584 // Check to see if this buildvec has a single non-undef value in its elements.
2585 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2586 if (N->getOperand(i).isUndef()) continue;
2587 if (!OpVal.getNode())
2588 OpVal = N->getOperand(i);
2589 else if (OpVal != N->getOperand(i))
2590 return SDValue();
2591 }
2592
2593 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2594
2595 unsigned ValSizeInBytes = EltSize;
2596 uint64_t Value = 0;
2597 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2598 Value = CN->getZExtValue();
2599 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2600 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2601 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2602 }
2603
2604 // If the splat value is larger than the element value, then we can never do
2605 // this splat. The only case that we could fit the replicated bits into our
2606 // immediate field for would be zero, and we prefer to use vxor for it.
2607 if (ValSizeInBytes < ByteSize) return SDValue();
2608
2609 // If the element value is larger than the splat value, check if it consists
2610 // of a repeated bit pattern of size ByteSize.
2611 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2612 return SDValue();
2613
2614 // Properly sign extend the value.
2615 int MaskVal = SignExtend32(Value, ByteSize * 8);
2616
2617 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2618 if (MaskVal == 0) return SDValue();
2619
2620 // Finally, if this value fits in a 5 bit sext field, return it
2621 if (SignExtend32<5>(MaskVal) == MaskVal)
2622 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2623 return SDValue();
2624}
2625
2626//===----------------------------------------------------------------------===//
2627// Addressing Mode Selection
2628//===----------------------------------------------------------------------===//
2629
2630/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2631/// or 64-bit immediate, and if the value can be accurately represented as a
2632/// sign extension from a 16-bit value. If so, this returns true and the
2633/// immediate.
2634bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2635 if (!isa<ConstantSDNode>(N))
2636 return false;
2637
2638 Imm = (int16_t)N->getAsZExtVal();
2639 if (N->getValueType(0) == MVT::i32)
2640 return Imm == (int32_t)N->getAsZExtVal();
2641 else
2642 return Imm == (int64_t)N->getAsZExtVal();
2643}
2645 return isIntS16Immediate(Op.getNode(), Imm);
2646}
2647
2648/// Used when computing address flags for selecting loads and stores.
2649/// If we have an OR, check if the LHS and RHS are provably disjoint.
2650/// An OR of two provably disjoint values is equivalent to an ADD.
2651/// Most PPC load/store instructions compute the effective address as a sum,
2652/// so doing this conversion is useful.
2653static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2654 if (N.getOpcode() != ISD::OR)
2655 return false;
2656 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2657 if (!LHSKnown.Zero.getBoolValue())
2658 return false;
2659 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2660 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2661}
2662
2663/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2664/// be represented as an indexed [r+r] operation.
2666 SDValue &Index,
2667 SelectionDAG &DAG) const {
2668 for (SDNode *U : N->uses()) {
2669 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2670 if (Memop->getMemoryVT() == MVT::f64) {
2671 Base = N.getOperand(0);
2672 Index = N.getOperand(1);
2673 return true;
2674 }
2675 }
2676 }
2677 return false;
2678}
2679
2680/// isIntS34Immediate - This method tests if value of node given can be
2681/// accurately represented as a sign extension from a 34-bit value. If so,
2682/// this returns true and the immediate.
2683bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2684 if (!isa<ConstantSDNode>(N))
2685 return false;
2686
2687 Imm = (int64_t)N->getAsZExtVal();
2688 return isInt<34>(Imm);
2689}
2691 return isIntS34Immediate(Op.getNode(), Imm);
2692}
2693
2694/// SelectAddressRegReg - Given the specified addressed, check to see if it
2695/// can be represented as an indexed [r+r] operation. Returns false if it
2696/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2697/// non-zero and N can be represented by a base register plus a signed 16-bit
2698/// displacement, make a more precise judgement by checking (displacement % \p
2699/// EncodingAlignment).
2702 MaybeAlign EncodingAlignment) const {
2703 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2704 // a [pc+imm].
2706 return false;
2707
2708 int16_t Imm = 0;
2709 if (N.getOpcode() == ISD::ADD) {
2710 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2711 // SPE load/store can only handle 8-bit offsets.
2712 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2713 return true;
2714 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2715 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2716 return false; // r+i
2717 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2718 return false; // r+i
2719
2720 Base = N.getOperand(0);
2721 Index = N.getOperand(1);
2722 return true;
2723 } else if (N.getOpcode() == ISD::OR) {
2724 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2725 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2726 return false; // r+i can fold it if we can.
2727
2728 // If this is an or of disjoint bitfields, we can codegen this as an add
2729 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2730 // disjoint.
2731 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2732
2733 if (LHSKnown.Zero.getBoolValue()) {
2734 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2735 // If all of the bits are known zero on the LHS or RHS, the add won't
2736 // carry.
2737 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2738 Base = N.getOperand(0);
2739 Index = N.getOperand(1);
2740 return true;
2741 }
2742 }
2743 }
2744
2745 return false;
2746}
2747
2748// If we happen to be doing an i64 load or store into a stack slot that has
2749// less than a 4-byte alignment, then the frame-index elimination may need to
2750// use an indexed load or store instruction (because the offset may not be a
2751// multiple of 4). The extra register needed to hold the offset comes from the
2752// register scavenger, and it is possible that the scavenger will need to use
2753// an emergency spill slot. As a result, we need to make sure that a spill slot
2754// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2755// stack slot.
2756static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2757 // FIXME: This does not handle the LWA case.
2758 if (VT != MVT::i64)
2759 return;
2760
2761 // NOTE: We'll exclude negative FIs here, which come from argument
2762 // lowering, because there are no known test cases triggering this problem
2763 // using packed structures (or similar). We can remove this exclusion if
2764 // we find such a test case. The reason why this is so test-case driven is
2765 // because this entire 'fixup' is only to prevent crashes (from the
2766 // register scavenger) on not-really-valid inputs. For example, if we have:
2767 // %a = alloca i1
2768 // %b = bitcast i1* %a to i64*
2769 // store i64* a, i64 b
2770 // then the store should really be marked as 'align 1', but is not. If it
2771 // were marked as 'align 1' then the indexed form would have been
2772 // instruction-selected initially, and the problem this 'fixup' is preventing
2773 // won't happen regardless.
2774 if (FrameIdx < 0)
2775 return;
2776
2778 MachineFrameInfo &MFI = MF.getFrameInfo();
2779
2780 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2781 return;
2782
2783 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2784 FuncInfo->setHasNonRISpills();
2785}
2786
2787/// Returns true if the address N can be represented by a base register plus
2788/// a signed 16-bit displacement [r+imm], and if it is not better
2789/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2790/// displacements that are multiples of that value.
2792 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2793 MaybeAlign EncodingAlignment) const {
2794 // FIXME dl should come from parent load or store, not from address
2795 SDLoc dl(N);
2796
2797 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2798 // a [pc+imm].
2800 return false;
2801
2802 // If this can be more profitably realized as r+r, fail.
2803 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2804 return false;
2805
2806 if (N.getOpcode() == ISD::ADD) {
2807 int16_t imm = 0;
2808 if (isIntS16Immediate(N.getOperand(1), imm) &&
2809 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2810 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2811 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2812 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2813 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2814 } else {
2815 Base = N.getOperand(0);
2816 }
2817 return true; // [r+i]
2818 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2819 // Match LOAD (ADD (X, Lo(G))).
2820 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2821 "Cannot handle constant offsets yet!");
2822 Disp = N.getOperand(1).getOperand(0); // The global address.
2827 Base = N.getOperand(0);
2828 return true; // [&g+r]
2829 }
2830 } else if (N.getOpcode() == ISD::OR) {
2831 int16_t imm = 0;
2832 if (isIntS16Immediate(N.getOperand(1), imm) &&
2833 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2834 // If this is an or of disjoint bitfields, we can codegen this as an add
2835 // (for better address arithmetic) if the LHS and RHS of the OR are
2836 // provably disjoint.
2837 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2838
2839 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2840 // If all of the bits are known zero on the LHS or RHS, the add won't
2841 // carry.
2842 if (FrameIndexSDNode *FI =
2843 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2844 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2845 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2846 } else {
2847 Base = N.getOperand(0);
2848 }
2849 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2850 return true;
2851 }
2852 }
2853 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2854 // Loading from a constant address.
2855
2856 // If this address fits entirely in a 16-bit sext immediate field, codegen
2857 // this as "d, 0"
2858 int16_t Imm;
2859 if (isIntS16Immediate(CN, Imm) &&
2860 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2861 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2862 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2863 CN->getValueType(0));
2864 return true;
2865 }
2866
2867 // Handle 32-bit sext immediates with LIS + addr mode.
2868 if ((CN->getValueType(0) == MVT::i32 ||
2869 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2870 (!EncodingAlignment ||
2871 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2872 int Addr = (int)CN->getZExtValue();
2873
2874 // Otherwise, break this down into an LIS + disp.
2875 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2876
2877 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2878 MVT::i32);
2879 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2880 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2881 return true;
2882 }
2883 }
2884
2885 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2886 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2887 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2888 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2889 } else
2890 Base = N;
2891 return true; // [r+0]
2892}
2893
2894/// Similar to the 16-bit case but for instructions that take a 34-bit
2895/// displacement field (prefixed loads/stores).
2897 SDValue &Base,
2898 SelectionDAG &DAG) const {
2899 // Only on 64-bit targets.
2900 if (N.getValueType() != MVT::i64)
2901 return false;
2902
2903 SDLoc dl(N);
2904 int64_t Imm = 0;
2905
2906 if (N.getOpcode() == ISD::ADD) {
2907 if (!isIntS34Immediate(N.getOperand(1), Imm))
2908 return false;
2909 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2910 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2911 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2912 else
2913 Base = N.getOperand(0);
2914 return true;
2915 }
2916
2917 if (N.getOpcode() == ISD::OR) {
2918 if (!isIntS34Immediate(N.getOperand(1), Imm))
2919 return false;
2920 // If this is an or of disjoint bitfields, we can codegen this as an add
2921 // (for better address arithmetic) if the LHS and RHS of the OR are
2922 // provably disjoint.
2923 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2924 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2925 return false;
2926 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2927 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2928 else
2929 Base = N.getOperand(0);
2930 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2931 return true;
2932 }
2933
2934 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2935 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2936 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2937 return true;
2938 }
2939
2940 return false;
2941}
2942
2943/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2944/// represented as an indexed [r+r] operation.
2946 SDValue &Index,
2947 SelectionDAG &DAG) const {
2948 // Check to see if we can easily represent this as an [r+r] address. This
2949 // will fail if it thinks that the address is more profitably represented as
2950 // reg+imm, e.g. where imm = 0.
2951 if (SelectAddressRegReg(N, Base, Index, DAG))
2952 return true;
2953
2954 // If the address is the result of an add, we will utilize the fact that the
2955 // address calculation includes an implicit add. However, we can reduce
2956 // register pressure if we do not materialize a constant just for use as the
2957 // index register. We only get rid of the add if it is not an add of a
2958 // value and a 16-bit signed constant and both have a single use.
2959 int16_t imm = 0;
2960 if (N.getOpcode() == ISD::ADD &&
2961 (!isIntS16Immediate(N.getOperand(1), imm) ||
2962 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2963 Base = N.getOperand(0);
2964 Index = N.getOperand(1);
2965 return true;
2966 }
2967
2968 // Otherwise, do it the hard way, using R0 as the base register.
2969 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2970 N.getValueType());
2971 Index = N;
2972 return true;
2973}
2974
2975template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2976 Ty *PCRelCand = dyn_cast<Ty>(N);
2977 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2978}
2979
2980/// Returns true if this address is a PC Relative address.
2981/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2982/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2984 // This is a materialize PC Relative node. Always select this as PC Relative.
2985 Base = N;
2986 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2987 return true;
2988 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2989 isValidPCRelNode<GlobalAddressSDNode>(N) ||
2990 isValidPCRelNode<JumpTableSDNode>(N) ||
2991 isValidPCRelNode<BlockAddressSDNode>(N))
2992 return true;
2993 return false;
2994}
2995
2996/// Returns true if we should use a direct load into vector instruction
2997/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2998static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2999
3000 // If there are any other uses other than scalar to vector, then we should
3001 // keep it as a scalar load -> direct move pattern to prevent multiple
3002 // loads.
3003 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
3004 if (!LD)
3005 return false;
3006
3007 EVT MemVT = LD->getMemoryVT();
3008 if (!MemVT.isSimple())
3009 return false;
3010 switch(MemVT.getSimpleVT().SimpleTy) {
3011 case MVT::i64:
3012 break;
3013 case MVT::i32:
3014 if (!ST.hasP8Vector())
3015 return false;
3016 break;
3017 case MVT::i16:
3018 case MVT::i8:
3019 if (!ST.hasP9Vector())
3020 return false;
3021 break;
3022 default:
3023 return false;
3024 }
3025
3026 SDValue LoadedVal(N, 0);
3027 if (!LoadedVal.hasOneUse())
3028 return false;
3029
3030 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
3031 UI != UE; ++UI)
3032 if (UI.getUse().get().getResNo() == 0 &&
3033 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3034 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
3035 return false;
3036
3037 return true;
3038}
3039
3040/// getPreIndexedAddressParts - returns true by value, base pointer and
3041/// offset pointer and addressing mode by reference if the node's address
3042/// can be legally represented as pre-indexed load / store address.
3044 SDValue &Offset,
3046 SelectionDAG &DAG) const {
3047 if (DisablePPCPreinc) return false;
3048
3049 bool isLoad = true;
3050 SDValue Ptr;
3051 EVT VT;
3052 Align Alignment;
3053 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3054 Ptr = LD->getBasePtr();
3055 VT = LD->getMemoryVT();
3056 Alignment = LD->getAlign();
3057 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3058 Ptr = ST->getBasePtr();
3059 VT = ST->getMemoryVT();
3060 Alignment = ST->getAlign();
3061 isLoad = false;
3062 } else
3063 return false;
3064
3065 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3066 // instructions because we can fold these into a more efficient instruction
3067 // instead, (such as LXSD).
3068 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3069 return false;
3070 }
3071
3072 // PowerPC doesn't have preinc load/store instructions for vectors
3073 if (VT.isVector())
3074 return false;
3075
3076 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3077 // Common code will reject creating a pre-inc form if the base pointer
3078 // is a frame index, or if N is a store and the base pointer is either
3079 // the same as or a predecessor of the value being stored. Check for
3080 // those situations here, and try with swapped Base/Offset instead.
3081 bool Swap = false;
3082
3083 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3084 Swap = true;
3085 else if (!isLoad) {
3086 SDValue Val = cast<StoreSDNode>(N)->getValue();
3087 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3088 Swap = true;
3089 }
3090
3091 if (Swap)
3093
3094 AM = ISD::PRE_INC;
3095 return true;
3096 }
3097
3098 // LDU/STU can only handle immediates that are a multiple of 4.
3099 if (VT != MVT::i64) {
3100 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3101 return false;
3102 } else {
3103 // LDU/STU need an address with at least 4-byte alignment.
3104 if (Alignment < Align(4))
3105 return false;
3106
3107 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3108 return false;
3109 }
3110
3111 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3112 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3113 // sext i32 to i64 when addr mode is r+i.
3114 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3115 LD->getExtensionType() == ISD::SEXTLOAD &&
3116 isa<ConstantSDNode>(Offset))
3117 return false;
3118 }
3119
3120 AM = ISD::PRE_INC;
3121 return true;
3122}
3123
3124//===----------------------------------------------------------------------===//
3125// LowerOperation implementation
3126//===----------------------------------------------------------------------===//
3127
3128/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3129/// and LoOpFlags to the target MO flags.
3130static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3131 unsigned &HiOpFlags, unsigned &LoOpFlags,
3132 const GlobalValue *GV = nullptr) {
3133 HiOpFlags = PPCII::MO_HA;
3134 LoOpFlags = PPCII::MO_LO;
3135
3136 // Don't use the pic base if not in PIC relocation model.
3137 if (IsPIC) {
3138 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3139 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3140 }
3141}
3142
3143static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3144 SelectionDAG &DAG) {
3145 SDLoc DL(HiPart);
3146 EVT PtrVT = HiPart.getValueType();
3147 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3148
3149 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3150 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3151
3152 // With PIC, the first instruction is actually "GR+hi(&G)".
3153 if (isPIC)
3154 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3155 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3156
3157 // Generate non-pic code that has direct accesses to the constant pool.
3158 // The address of the global is just (hi(&g)+lo(&g)).
3159 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3160}
3161
3163 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3164 FuncInfo->setUsesTOCBasePtr();
3165}
3166
3169}
3170
3171SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3172 SDValue GA) const {
3173 const bool Is64Bit = Subtarget.isPPC64();
3174 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3175 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3176 : Subtarget.isAIXABI()
3177 ? DAG.getRegister(PPC::R2, VT)
3178 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3179 SDValue Ops[] = { GA, Reg };
3180 return DAG.getMemIntrinsicNode(
3181 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3184}
3185
3186SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3187 SelectionDAG &DAG) const {
3188 EVT PtrVT = Op.getValueType();
3189 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3190 const Constant *C = CP->getConstVal();
3191
3192 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3193 // The actual address of the GlobalValue is stored in the TOC.
3194 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3195 if (Subtarget.isUsingPCRelativeCalls()) {
3196 SDLoc DL(CP);
3197 EVT Ty = getPointerTy(DAG.getDataLayout());
3198 SDValue ConstPool = DAG.getTargetConstantPool(
3199 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3200 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3201 }
3202 setUsesTOCBasePtr(DAG);
3203 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3204 return getTOCEntry(DAG, SDLoc(CP), GA);
3205 }
3206
3207 unsigned MOHiFlag, MOLoFlag;
3208 bool IsPIC = isPositionIndependent();
3209 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3210
3211 if (IsPIC && Subtarget.isSVR4ABI()) {
3212 SDValue GA =
3213 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3214 return getTOCEntry(DAG, SDLoc(CP), GA);
3215 }
3216
3217 SDValue CPIHi =
3218 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3219 SDValue CPILo =
3220 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3221 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3222}
3223
3224// For 64-bit PowerPC, prefer the more compact relative encodings.
3225// This trades 32 bits per jump table entry for one or two instructions
3226// on the jump site.
3228 if (isJumpTableRelative())
3230
3232}
3233
3236 return false;
3237 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3238 return true;
3240}
3241
3243 SelectionDAG &DAG) const {
3244 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3246
3247 switch (getTargetMachine().getCodeModel()) {
3248 case CodeModel::Small:
3249 case CodeModel::Medium:
3251 default:
3252 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3254 }
3255}
3256
3257const MCExpr *
3259 unsigned JTI,
3260 MCContext &Ctx) const {
3261 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3263
3264 switch (getTargetMachine().getCodeModel()) {
3265 case CodeModel::Small:
3266 case CodeModel::Medium:
3268 default:
3269 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3270 }
3271}
3272
3273SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3274 EVT PtrVT = Op.getValueType();
3275 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3276
3277 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3278 if (Subtarget.isUsingPCRelativeCalls()) {
3279 SDLoc DL(JT);
3280 EVT Ty = getPointerTy(DAG.getDataLayout());
3281 SDValue GA =
3282 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3283 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3284 return MatAddr;
3285 }
3286
3287 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3288 // The actual address of the GlobalValue is stored in the TOC.
3289 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3290 setUsesTOCBasePtr(DAG);
3291 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3292 return getTOCEntry(DAG, SDLoc(JT), GA);
3293 }
3294
3295 unsigned MOHiFlag, MOLoFlag;
3296 bool IsPIC = isPositionIndependent();
3297 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3298
3299 if (IsPIC && Subtarget.isSVR4ABI()) {
3300 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3302 return getTOCEntry(DAG, SDLoc(GA), GA);
3303 }
3304
3305 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3306 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3307 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3308}
3309
3310SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3311 SelectionDAG &DAG) const {
3312 EVT PtrVT = Op.getValueType();
3313 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3314 const BlockAddress *BA = BASDN->getBlockAddress();
3315
3316 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3317 if (Subtarget.isUsingPCRelativeCalls()) {
3318 SDLoc DL(BASDN);
3319 EVT Ty = getPointerTy(DAG.getDataLayout());
3320 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3322 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3323 return MatAddr;
3324 }
3325
3326 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3327 // The actual BlockAddress is stored in the TOC.
3328 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3329 setUsesTOCBasePtr(DAG);
3330 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3331 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3332 }
3333
3334 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3335 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3336 return getTOCEntry(
3337 DAG, SDLoc(BASDN),
3338 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3339
3340 unsigned MOHiFlag, MOLoFlag;
3341 bool IsPIC = isPositionIndependent();
3342 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3343 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3344 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3345 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3346}
3347
3348SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3349 SelectionDAG &DAG) const {
3350 if (Subtarget.isAIXABI())
3351 return LowerGlobalTLSAddressAIX(Op, DAG);
3352
3353 return LowerGlobalTLSAddressLinux(Op, DAG);
3354}
3355
3356SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3357 SelectionDAG &DAG) const {
3358 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3359
3360 if (DAG.getTarget().useEmulatedTLS())
3361 report_fatal_error("Emulated TLS is not yet supported on AIX");
3362
3363 SDLoc dl(GA);
3364 const GlobalValue *GV = GA->getGlobal();
3365 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3366 bool Is64Bit = Subtarget.isPPC64();
3367 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3369 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3370
3371 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3372 SDValue VariableOffsetTGA =
3373 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3374 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3375 SDValue TLSReg;
3376 if (Is64Bit) {
3377 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3378 // involves a load of the variable offset (from the TOC), followed by an
3379 // add of the loaded variable offset to R13 (the thread pointer).
3380 // This code sequence looks like:
3381 // ld reg1,var[TC](2)
3382 // add reg2, reg1, r13 // r13 contains the thread pointer
3383 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3384
3385 // With the -maix-small-local-exec-tls option, produce a faster access
3386 // sequence for local-exec TLS variables where the offset from the TLS
3387 // base is encoded as an immediate operand.
3388 //
3389 // We only utilize the faster local-exec access sequence when the TLS
3390 // variable has a size within the policy limit. We treat types that are
3391 // not sized or are empty as being over the policy size limit.
3392 if (HasAIXSmallLocalExecTLS && IsTLSLocalExecModel) {
3393 Type *GVType = GV->getValueType();
3394 if (GVType->isSized() && !GVType->isEmptyTy() &&
3395 GV->getParent()->getDataLayout().getTypeAllocSize(GVType) <=
3397 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3398 }
3399 } else {
3400 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3401 // involves loading the variable offset from the TOC, generating a call to
3402 // .__get_tpointer to get the thread pointer (which will be in R3), and
3403 // adding the two together:
3404 // lwz reg1,var[TC](2)
3405 // bla .__get_tpointer
3406 // add reg2, reg1, r3
3407 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3408
3409 // We do not implement the 32-bit version of the faster access sequence
3410 // for local-exec that is controlled by -maix-small-local-exec-tls.
3411 if (HasAIXSmallLocalExecTLS)
3412 report_fatal_error("The small-local-exec TLS access sequence is "
3413 "currently only supported on AIX (64-bit mode).");
3414 }
3415 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3416 }
3417
3418 // Only Local-Exec, Initial-Exec and General-Dynamic TLS models are currently
3419 // supported models. If Local- or Initial-exec are not possible or specified,
3420 // all GlobalTLSAddress nodes are lowered using the general-dynamic model.
3421 // We need to generate two TOC entries, one for the variable offset, one for
3422 // the region handle. The global address for the TOC entry of the region
3423 // handle is created with the MO_TLSGDM_FLAG flag and the global address
3424 // for the TOC entry of the variable offset is created with MO_TLSGD_FLAG.
3425 SDValue VariableOffsetTGA =
3426 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3427 SDValue RegionHandleTGA =
3428 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3429 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3430 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3431 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3432 RegionHandle);
3433}
3434
3435SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3436 SelectionDAG &DAG) const {
3437 // FIXME: TLS addresses currently use medium model code sequences,
3438 // which is the most useful form. Eventually support for small and
3439 // large models could be added if users need it, at the cost of
3440 // additional complexity.
3441 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3442 if (DAG.getTarget().useEmulatedTLS())
3443 return LowerToTLSEmulatedModel(GA, DAG);
3444
3445 SDLoc dl(GA);
3446 const GlobalValue *GV = GA->getGlobal();
3447 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3448 bool is64bit = Subtarget.isPPC64();
3449 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3450 PICLevel::Level picLevel = M->getPICLevel();
3451
3453 TLSModel::Model Model = TM.getTLSModel(GV);
3454
3455 if (Model == TLSModel::LocalExec) {
3456 if (Subtarget.isUsingPCRelativeCalls()) {
3457 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3458 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3460 SDValue MatAddr =
3461 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3462 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3463 }
3464
3465 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3467 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3469 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3470 : DAG.getRegister(PPC::R2, MVT::i32);
3471
3472 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3473 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3474 }
3475
3476 if (Model == TLSModel::InitialExec) {
3477 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3479 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3480 SDValue TGATLS = DAG.getTargetGlobalAddress(
3481 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3482 SDValue TPOffset;
3483 if (IsPCRel) {
3484 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3485 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3487 } else {
3488 SDValue GOTPtr;
3489 if (is64bit) {
3490 setUsesTOCBasePtr(DAG);
3491 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3492 GOTPtr =
3493 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3494 } else {
3495 if (!TM.isPositionIndependent())
3496 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3497 else if (picLevel == PICLevel::SmallPIC)
3498 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3499 else
3500 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3501 }
3502 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3503 }
3504 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3505 }
3506
3507 if (Model == TLSModel::GeneralDynamic) {
3508 if (Subtarget.isUsingPCRelativeCalls()) {
3509 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3511 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3512 }
3513
3514 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3515 SDValue GOTPtr;
3516 if (is64bit) {
3517 setUsesTOCBasePtr(DAG);
3518 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3519 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3520 GOTReg, TGA);
3521 } else {
3522 if (picLevel == PICLevel::SmallPIC)
3523 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3524 else
3525 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3526 }
3527 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3528 GOTPtr, TGA, TGA);
3529 }
3530
3531 if (Model == TLSModel::LocalDynamic) {
3532 if (Subtarget.isUsingPCRelativeCalls()) {
3533 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3535 SDValue MatPCRel =
3536 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3537 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3538 }
3539
3540 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3541 SDValue GOTPtr;
3542 if (is64bit) {
3543 setUsesTOCBasePtr(DAG);
3544 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3545 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3546 GOTReg, TGA);
3547 } else {
3548 if (picLevel == PICLevel::SmallPIC)
3549 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3550 else
3551 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3552 }
3553 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3554 PtrVT, GOTPtr, TGA, TGA);
3555 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3556 PtrVT, TLSAddr, TGA);
3557 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3558 }
3559
3560 llvm_unreachable("Unknown TLS model!");
3561}
3562
3563SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3564 SelectionDAG &DAG) const {
3565 EVT PtrVT = Op.getValueType();
3566 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3567 SDLoc DL(GSDN);
3568 const GlobalValue *GV = GSDN->getGlobal();
3569
3570 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3571 // The actual address of the GlobalValue is stored in the TOC.
3572 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3573 if (Subtarget.isUsingPCRelativeCalls()) {
3574 EVT Ty = getPointerTy(DAG.getDataLayout());
3576 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3578 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3579 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3581 return Load;
3582 } else {
3583 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3585 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3586 }
3587 }
3588 setUsesTOCBasePtr(DAG);
3589 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3590 return getTOCEntry(DAG, DL, GA);
3591 }
3592
3593 unsigned MOHiFlag, MOLoFlag;
3594 bool IsPIC = isPositionIndependent();
3595 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3596
3597 if (IsPIC && Subtarget.isSVR4ABI()) {
3598 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3599 GSDN->getOffset(),
3601 return getTOCEntry(DAG, DL, GA);
3602 }
3603
3604 SDValue GAHi =
3605 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3606 SDValue GALo =
3607 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3608
3609 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3610}
3611
3612SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3613 bool IsStrict = Op->isStrictFPOpcode();
3615 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3616 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3617 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3618 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3619 EVT LHSVT = LHS.getValueType();
3620 SDLoc dl(Op);
3621
3622 // Soften the setcc with libcall if it is fp128.
3623 if (LHSVT == MVT::f128) {
3624 assert(!Subtarget.hasP9Vector() &&
3625 "SETCC for f128 is already legal under Power9!");
3626 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3627 Op->getOpcode() == ISD::STRICT_FSETCCS);
3628 if (RHS.getNode())
3629 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3630 DAG.getCondCode(CC));
3631 if (IsStrict)
3632 return DAG.getMergeValues({LHS, Chain}, dl);
3633 return LHS;
3634 }
3635
3636 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3637
3638 if (Op.getValueType() == MVT::v2i64) {
3639 // When the operands themselves are v2i64 values, we need to do something
3640 // special because VSX has no underlying comparison operations for these.
3641 if (LHS.getValueType() == MVT::v2i64) {
3642 // Equality can be handled by casting to the legal type for Altivec
3643 // comparisons, everything else needs to be expanded.
3644 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3645 return SDValue();
3646 SDValue SetCC32 = DAG.getSetCC(
3647 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3648 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3649 int ShuffV[] = {1, 0, 3, 2};
3650 SDValue Shuff =
3651 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3652 return DAG.getBitcast(MVT::v2i64,
3654 dl, MVT::v4i32, Shuff, SetCC32));
3655 }
3656
3657 // We handle most of these in the usual way.
3658 return Op;
3659 }
3660
3661 // If we're comparing for equality to zero, expose the fact that this is
3662 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3663 // fold the new nodes.
3664 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3665 return V;
3666
3667 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3668 // Leave comparisons against 0 and -1 alone for now, since they're usually
3669 // optimized. FIXME: revisit this when we can custom lower all setcc
3670 // optimizations.
3671 if (C->isAllOnes() || C->isZero())
3672 return SDValue();
3673 }
3674
3675 // If we have an integer seteq/setne, turn it into a compare against zero
3676 // by xor'ing the rhs with the lhs, which is faster than setting a
3677 // condition register, reading it back out, and masking the correct bit. The
3678 // normal approach here uses sub to do this instead of xor. Using xor exposes
3679 // the result to other bit-twiddling opportunities.
3680 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3681 EVT VT = Op.getValueType();
3682 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3683 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3684 }
3685 return SDValue();
3686}
3687
3688SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3689 SDNode *Node = Op.getNode();
3690 EVT VT = Node->getValueType(0);
3691 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3692 SDValue InChain = Node->getOperand(0);
3693 SDValue VAListPtr = Node->getOperand(1);
3694 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3695 SDLoc dl(Node);
3696
3697 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3698
3699 // gpr_index
3700 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3701 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3702 InChain = GprIndex.getValue(1);
3703
3704 if (VT == MVT::i64) {
3705 // Check if GprIndex is even
3706 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3707 DAG.getConstant(1, dl, MVT::i32));
3708 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3709 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3710 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3711 DAG.getConstant(1, dl, MVT::i32));
3712 // Align GprIndex to be even if it isn't
3713 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3714 GprIndex);
3715 }
3716
3717 // fpr index is 1 byte after gpr
3718 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3719 DAG.getConstant(1, dl, MVT::i32));
3720
3721 // fpr
3722 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3723 FprPtr, MachinePointerInfo(SV), MVT::i8);
3724 InChain = FprIndex.getValue(1);
3725
3726 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3727 DAG.getConstant(8, dl, MVT::i32));
3728
3729 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3730 DAG.getConstant(4, dl, MVT::i32));
3731
3732 // areas
3733 SDValue OverflowArea =
3734 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3735 InChain = OverflowArea.getValue(1);
3736
3737 SDValue RegSaveArea =
3738 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3739 InChain = RegSaveArea.getValue(1);
3740
3741 // select overflow_area if index > 8
3742 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3743 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3744
3745 // adjustment constant gpr_index * 4/8
3746 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3747 VT.isInteger() ? GprIndex : FprIndex,
3748 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3749 MVT::i32));
3750
3751 // OurReg = RegSaveArea + RegConstant
3752 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3753 RegConstant);
3754
3755 // Floating types are 32 bytes into RegSaveArea
3756 if (VT.isFloatingPoint())
3757 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3758 DAG.getConstant(32, dl, MVT::i32));
3759
3760 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3761 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3762 VT.isInteger() ? GprIndex : FprIndex,
3763 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3764 MVT::i32));
3765
3766 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3767 VT.isInteger() ? VAListPtr : FprPtr,
3768 MachinePointerInfo(SV), MVT::i8);
3769
3770 // determine if we should load from reg_save_area or overflow_area
3771 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3772
3773 // increase overflow_area by 4/8 if gpr/fpr > 8
3774 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3775 DAG.getConstant(VT.isInteger() ? 4 : 8,
3776 dl, MVT::i32));
3777
3778 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3779 OverflowAreaPlusN);
3780
3781 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3782 MachinePointerInfo(), MVT::i32);
3783
3784 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3785}
3786
3787SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3788 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3789
3790 // We have to copy the entire va_list struct:
3791 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3792 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3793 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3794 false, true, false, MachinePointerInfo(),
3796}
3797
3798SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3799 SelectionDAG &DAG) const {
3800 if (Subtarget.isAIXABI())
3801 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3802
3803 return Op.getOperand(0);
3804}
3805
3806SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3809
3810 assert((Op.getOpcode() == ISD::INLINEASM ||
3811 Op.getOpcode() == ISD::INLINEASM_BR) &&
3812 "Expecting Inline ASM node.");
3813
3814 // If an LR store is already known to be required then there is not point in
3815 // checking this ASM as well.
3816 if (MFI.isLRStoreRequired())
3817 return Op;
3818
3819 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3820 // type MVT::Glue. We want to ignore this last operand if that is the case.
3821 unsigned NumOps = Op.getNumOperands();
3822 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3823 --NumOps;
3824
3825 // Check all operands that may contain the LR.
3826 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3827 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3828 unsigned NumVals = Flags.getNumOperandRegisters();
3829 ++i; // Skip the ID value.
3830
3831 switch (Flags.getKind()) {
3832 default:
3833 llvm_unreachable("Bad flags!");
3837 i += NumVals;
3838 break;
3842 for (; NumVals; --NumVals, ++i) {
3843 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3844 if (Reg != PPC::LR && Reg != PPC::LR8)
3845 continue;
3846 MFI.setLRStoreRequired();
3847 return Op;
3848 }
3849 break;
3850 }
3851 }
3852 }
3853
3854 return Op;
3855}
3856
3857SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3858 SelectionDAG &DAG) const {
3859 if (Subtarget.isAIXABI())
3860 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3861
3862 SDValue Chain = Op.getOperand(0);
3863 SDValue Trmp = Op.getOperand(1); // trampoline
3864 SDValue FPtr = Op.getOperand(2); // nested function
3865 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3866 SDLoc dl(Op);
3867
3868 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3869 bool isPPC64 = (PtrVT == MVT::i64);
3870 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3871
3874
3875 Entry.Ty = IntPtrTy;
3876 Entry.Node = Trmp; Args.push_back(Entry);
3877
3878 // TrampSize == (isPPC64 ? 48 : 40);
3879 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3880 isPPC64 ? MVT::i64 : MVT::i32);
3881 Args.push_back(Entry);
3882
3883 Entry.Node = FPtr; Args.push_back(Entry);
3884 Entry.Node = Nest; Args.push_back(Entry);
3885
3886 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3888 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3890 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3891
3892 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3893 return CallResult.second;
3894}
3895
3896SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3898 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3899 EVT PtrVT = getPointerTy(MF.getDataLayout());
3900
3901 SDLoc dl(Op);
3902
3903 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3904 // vastart just stores the address of the VarArgsFrameIndex slot into the
3905 // memory location argument.
3906 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3907 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3908 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3909 MachinePointerInfo(SV));
3910 }
3911
3912 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3913 // We suppose the given va_list is already allocated.
3914 //
3915 // typedef struct {
3916 // char gpr; /* index into the array of 8 GPRs
3917 // * stored in the register save area
3918 // * gpr=0 corresponds to r3,
3919 // * gpr=1 to r4, etc.
3920 // */
3921 // char fpr; /* index into the array of 8 FPRs
3922 // * stored in the register save area
3923 // * fpr=0 corresponds to f1,
3924 // * fpr=1 to f2, etc.
3925 // */
3926 // char *overflow_arg_area;
3927 // /* location on stack that holds
3928 // * the next overflow argument
3929 // */
3930 // char *reg_save_area;
3931 // /* where r3:r10 and f1:f8 (if saved)
3932 // * are stored
3933 // */
3934 // } va_list[1];
3935
3936 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3937 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3938 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3939 PtrVT);
3940 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3941 PtrVT);
3942
3943 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3944 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3945
3946 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3947 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3948
3949 uint64_t FPROffset = 1;
3950 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3951
3952 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3953
3954 // Store first byte : number of int regs
3955 SDValue firstStore =
3956 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3957 MachinePointerInfo(SV), MVT::i8);
3958 uint64_t nextOffset = FPROffset;
3959 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3960 ConstFPROffset);
3961
3962 // Store second byte : number of float regs
3963 SDValue secondStore =
3964 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3965 MachinePointerInfo(SV, nextOffset), MVT::i8);
3966 nextOffset += StackOffset;
3967 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3968
3969 // Store second word : arguments given on stack
3970 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3971 MachinePointerInfo(SV, nextOffset));
3972 nextOffset += FrameOffset;
3973 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3974
3975 // Store third word : arguments given in registers
3976 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3977 MachinePointerInfo(SV, nextOffset));
3978}
3979
3980/// FPR - The set of FP registers that should be allocated for arguments
3981/// on Darwin and AIX.
3982static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3983 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3984 PPC::F11, PPC::F12, PPC::F13};
3985
3986/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3987/// the stack.
3988static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3989 unsigned PtrByteSize) {
3990 unsigned ArgSize = ArgVT.getStoreSize();
3991 if (Flags.isByVal())
3992 ArgSize = Flags.getByValSize();
3993
3994 // Round up to multiples of the pointer size, except for array members,
3995 // which are always packed.
3996 if (!Flags.isInConsecutiveRegs())
3997 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3998
3999 return ArgSize;
4000}
4001
4002/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4003/// on the stack.
4005 ISD::ArgFlagsTy Flags,
4006 unsigned PtrByteSize) {
4007 Align Alignment(PtrByteSize);
4008
4009 // Altivec parameters are padded to a 16 byte boundary.
4010 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4011 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4012 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4013 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4014 Alignment = Align(16);
4015
4016 // ByVal parameters are aligned as requested.
4017 if (Flags.isByVal()) {
4018 auto BVAlign = Flags.getNonZeroByValAlign();
4019 if (BVAlign > PtrByteSize) {
4020 if (BVAlign.value() % PtrByteSize != 0)
4022 "ByVal alignment is not a multiple of the pointer size");
4023
4024 Alignment = BVAlign;
4025 }
4026 }
4027
4028 // Array members are always packed to their original alignment.
4029 if (Flags.isInConsecutiveRegs()) {
4030 // If the array member was split into multiple registers, the first
4031 // needs to be aligned to the size of the full type. (Except for
4032 // ppcf128, which is only aligned as its f64 components.)
4033 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4034 Alignment = Align(OrigVT.getStoreSize());
4035 else
4036 Alignment = Align(ArgVT.getStoreSize());
4037 }
4038
4039 return Alignment;
4040}
4041
4042/// CalculateStackSlotUsed - Return whether this argument will use its
4043/// stack slot (instead of being passed in registers). ArgOffset,
4044/// AvailableFPRs, and AvailableVRs must hold the current argument
4045/// position, and will be updated to account for this argument.
4046static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4047 unsigned PtrByteSize, unsigned LinkageSize,
4048 unsigned ParamAreaSize, unsigned &ArgOffset,
4049 unsigned &AvailableFPRs,
4050 unsigned &AvailableVRs) {
4051 bool UseMemory = false;
4052
4053 // Respect alignment of argument on the stack.
4054 Align Alignment =
4055 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4056 ArgOffset = alignTo(ArgOffset, Alignment);
4057 // If there's no space left in the argument save area, we must
4058 // use memory (this check also catches zero-sized arguments).
4059 if (ArgOffset >= LinkageSize + ParamAreaSize)
4060 UseMemory = true;
4061
4062 // Allocate argument on the stack.
4063 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4064 if (Flags.isInConsecutiveRegsLast())
4065 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4066 // If we overran the argument save area, we must use memory
4067 // (this check catches arguments passed partially in memory)
4068 if (ArgOffset > LinkageSize + ParamAreaSize)
4069 UseMemory = true;
4070
4071 // However, if the argument is actually passed in an FPR or a VR,
4072 // we don't use memory after all.
4073 if (!Flags.isByVal()) {
4074 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4075 if (AvailableFPRs > 0) {
4076 --AvailableFPRs;
4077 return false;
4078 }
4079 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4080 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4081 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4082 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4083 if (AvailableVRs > 0) {
4084 --AvailableVRs;
4085 return false;
4086 }
4087 }
4088
4089 return UseMemory;
4090}
4091
4092/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4093/// ensure minimum alignment required for target.
4095 unsigned NumBytes) {
4096 return alignTo(NumBytes, Lowering->getStackAlign());
4097}
4098
4099SDValue PPCTargetLowering::LowerFormalArguments(
4100 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4101 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4102 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4103 if (Subtarget.isAIXABI())
4104 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4105 InVals);
4106 if (Subtarget.is64BitELFABI())
4107 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4108 InVals);
4109 assert(Subtarget.is32BitELFABI());
4110 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4111 InVals);
4112}
4113
4114SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4115 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4116 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4117 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4118
4119 // 32-bit SVR4 ABI Stack Frame Layout:
4120 // +-----------------------------------+
4121 // +--> | Back chain |
4122 // | +-----------------------------------+
4123 // | | Floating-point register save area |
4124 // | +-----------------------------------+
4125 // | | General register save area |
4126 // | +-----------------------------------+
4127 // | | CR save word |
4128 // | +-----------------------------------+
4129 // | | VRSAVE save word |
4130 // | +-----------------------------------+
4131 // | | Alignment padding |
4132 // | +-----------------------------------+
4133 // | | Vector register save area |
4134 // | +-----------------------------------+
4135 // | | Local variable space |
4136 // | +-----------------------------------+
4137 // | | Parameter list area |
4138 // | +-----------------------------------+
4139 // | | LR save word |
4140 // | +-----------------------------------+
4141 // SP--> +--- | Back chain |
4142 // +-----------------------------------+
4143 //
4144 // Specifications:
4145 // System V Application Binary Interface PowerPC Processor Supplement
4146 // AltiVec Technology Programming Interface Manual
4147
4149 MachineFrameInfo &MFI = MF.getFrameInfo();
4150 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4151
4152 EVT PtrVT = getPointerTy(MF.getDataLayout());
4153 // Potential tail calls could cause overwriting of argument stack slots.
4154 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4155 (CallConv == CallingConv::Fast));
4156 const Align PtrAlign(4);
4157
4158 // Assign locations to all of the incoming arguments.
4160 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4161 *DAG.getContext());
4162
4163 // Reserve space for the linkage area on the stack.
4164 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4165 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4166 if (useSoftFloat())
4167 CCInfo.PreAnalyzeFormalArguments(Ins);
4168
4169 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4170 CCInfo.clearWasPPCF128();
4171
4172 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4173 CCValAssign &VA = ArgLocs[i];
4174
4175 // Arguments stored in registers.
4176 if (VA.isRegLoc()) {
4177 const TargetRegisterClass *RC;
4178 EVT ValVT = VA.getValVT();
4179
4180 switch (ValVT.getSimpleVT().SimpleTy) {
4181 default:
4182 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4183 case MVT::i1:
4184 case MVT::i32:
4185 RC = &PPC::GPRCRegClass;
4186 break;
4187 case MVT::f32:
4188 if (Subtarget.hasP8Vector())
4189 RC = &PPC::VSSRCRegClass;
4190 else if (Subtarget.hasSPE())
4191 RC = &PPC::GPRCRegClass;
4192 else
4193 RC = &PPC::F4RCRegClass;
4194 break;
4195 case MVT::f64:
4196 if (Subtarget.hasVSX())
4197 RC = &PPC::VSFRCRegClass;
4198 else if (Subtarget.hasSPE())
4199 // SPE passes doubles in GPR pairs.
4200 RC = &PPC::GPRCRegClass;
4201 else
4202 RC = &PPC::F8RCRegClass;
4203 break;
4204 case MVT::v16i8:
4205 case MVT::v8i16:
4206 case MVT::v4i32:
4207 RC = &PPC::VRRCRegClass;
4208 break;
4209 case MVT::v4f32:
4210 RC = &PPC::VRRCRegClass;
4211 break;
4212 case MVT::v2f64:
4213 case MVT::v2i64:
4214 RC = &PPC::VRRCRegClass;
4215 break;
4216 }
4217
4218 SDValue ArgValue;
4219 // Transform the arguments stored in physical registers into
4220 // virtual ones.
4221 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4222 assert(i + 1 < e && "No second half of double precision argument");
4223 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4224 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4225 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4226 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4227 if (!Subtarget.isLittleEndian())
4228 std::swap (ArgValueLo, ArgValueHi);
4229 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4230 ArgValueHi);
4231 } else {
4232 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4233 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4234 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4235 if (ValVT == MVT::i1)
4236 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4237 }
4238
4239 InVals.push_back(ArgValue);
4240 } else {
4241 // Argument stored in memory.
4242 assert(VA.isMemLoc());
4243
4244 // Get the extended size of the argument type in stack
4245 unsigned ArgSize = VA.getLocVT().getStoreSize();
4246 // Get the actual size of the argument type
4247 unsigned ObjSize = VA.getValVT().getStoreSize();
4248 unsigned ArgOffset = VA.getLocMemOffset();
4249 // Stack objects in PPC32 are right justified.
4250 ArgOffset += ArgSize - ObjSize;
4251 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4252
4253 // Create load nodes to retrieve arguments from the stack.
4254 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4255 InVals.push_back(
4256 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4257 }
4258 }
4259
4260 // Assign locations to all of the incoming aggregate by value arguments.
4261 // Aggregates passed by value are stored in the local variable space of the
4262 // caller's stack frame, right above the parameter list area.
4263 SmallVector<CCValAssign, 16> ByValArgLocs;
4264 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4265 ByValArgLocs, *DAG.getContext());
4266
4267 // Reserve stack space for the allocations in CCInfo.
4268 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4269
4270 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4271
4272 // Area that is at least reserved in the caller of this function.
4273 unsigned MinReservedArea = CCByValInfo.getStackSize();
4274 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4275
4276 // Set the size that is at least reserved in caller of this function. Tail
4277 // call optimized function's reserved stack space needs to be aligned so that
4278 // taking the difference between two stack areas will result in an aligned
4279 // stack.
4280 MinReservedArea =
4281 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4282 FuncInfo->setMinReservedArea(MinReservedArea);
4283
4285
4286 // If the function takes variable number of arguments, make a frame index for
4287 // the start of the first vararg value... for expansion of llvm.va_start.
4288 if (isVarArg) {
4289 static const MCPhysReg GPArgRegs[] = {
4290 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4291 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4292 };
4293 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4294
4295 static const MCPhysReg FPArgRegs[] = {
4296 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4297 PPC::F8
4298 };
4299 unsigned NumFPArgRegs = std::size(FPArgRegs);
4300
4301 if (useSoftFloat() || hasSPE())
4302 NumFPArgRegs = 0;
4303
4304 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4305 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4306
4307 // Make room for NumGPArgRegs and NumFPArgRegs.
4308 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4309 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4310
4312 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4313
4314 FuncInfo->setVarArgsFrameIndex(
4315 MFI.CreateStackObject(Depth, Align(8), false));
4316 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4317
4318 // The fixed integer arguments of a variadic function are stored to the
4319 // VarArgsFrameIndex on the stack so that they may be loaded by
4320 // dereferencing the result of va_next.
4321 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4322 // Get an existing live-in vreg, or add a new one.
4323 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4324 if (!VReg)
4325 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4326
4327 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4328 SDValue Store =
4329 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4330 MemOps.push_back(Store);
4331 // Increment the address by four for the next argument to store
4332 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4333 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4334 }
4335
4336 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4337 // is set.
4338 // The double arguments are stored to the VarArgsFrameIndex
4339 // on the stack.
4340 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4341 // Get an existing live-in vreg, or add a new one.
4342 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4343 if (!VReg)
4344 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4345
4346 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4347 SDValue Store =
4348 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4349 MemOps.push_back(Store);
4350 // Increment the address by eight for the next argument to store
4351 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4352 PtrVT);
4353 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4354 }
4355 }
4356
4357 if (!MemOps.empty())
4358 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4359
4360 return Chain;
4361}
4362
4363// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4364// value to MVT::i64 and then truncate to the correct register size.
4365SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4366 EVT ObjectVT, SelectionDAG &DAG,
4367 SDValue ArgVal,
4368 const SDLoc &dl) const {
4369 if (Flags.isSExt())
4370 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4371 DAG.getValueType(ObjectVT));
4372 else if (Flags.isZExt())
4373 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4374 DAG.getValueType(ObjectVT));
4375
4376 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4377}
4378
4379SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4380 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4381 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4382 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4383 // TODO: add description of PPC stack frame format, or at least some docs.
4384 //
4385 bool isELFv2ABI = Subtarget.isELFv2ABI();
4386 bool isLittleEndian = Subtarget.isLittleEndian();
4388 MachineFrameInfo &MFI = MF.getFrameInfo();
4389 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4390
4391 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4392 "fastcc not supported on varargs functions");
4393
4394 EVT PtrVT = getPointerTy(MF.getDataLayout());
4395 // Potential tail calls could cause overwriting of argument stack slots.
4396 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4397 (CallConv == CallingConv::Fast));
4398 unsigned PtrByteSize = 8;
4399 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4400
4401 static const MCPhysReg GPR[] = {
4402 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4403 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4404 };
4405 static const MCPhysReg VR[] = {
4406 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4407 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4408 };
4409
4410 const unsigned Num_GPR_Regs = std::size(GPR);
4411 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4412 const unsigned Num_VR_Regs = std::size(VR);
4413
4414 // Do a first pass over the arguments to determine whether the ABI
4415 // guarantees that our caller has allocated the parameter save area
4416 // on its stack frame. In the ELFv1 ABI, this is always the case;
4417 // in the ELFv2 ABI, it is true if this is a vararg function or if
4418 // any parameter is located in a stack slot.
4419
4420 bool HasParameterArea = !isELFv2ABI || isVarArg;
4421 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4422 unsigned NumBytes = LinkageSize;
4423 unsigned AvailableFPRs = Num_FPR_Regs;
4424 unsigned AvailableVRs = Num_VR_Regs;
4425 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4426 if (Ins[i].Flags.isNest())
4427 continue;
4428
4429 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4430 PtrByteSize, LinkageSize, ParamAreaSize,
4431 NumBytes, AvailableFPRs, AvailableVRs))
4432 HasParameterArea = true;
4433 }
4434
4435 // Add DAG nodes to load the arguments or copy them out of registers. On
4436 // entry to a function on PPC, the arguments start after the linkage area,
4437 // although the first ones are often in registers.
4438
4439 unsigned ArgOffset = LinkageSize;
4440 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4443 unsigned CurArgIdx = 0;
4444 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4445 SDValue ArgVal;
4446 bool needsLoad = false;
4447 EVT ObjectVT = Ins[ArgNo].VT;
4448 EVT OrigVT = Ins[ArgNo].ArgVT;
4449 unsigned ObjSize = ObjectVT.getStoreSize();
4450 unsigned ArgSize = ObjSize;
4451 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4452 if (Ins[ArgNo].isOrigArg()) {
4453 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4454 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4455 }
4456 // We re-align the argument offset for each argument, except when using the
4457 // fast calling convention, when we need to make sure we do that only when
4458 // we'll actually use a stack slot.
4459 unsigned CurArgOffset;
4460 Align Alignment;
4461 auto ComputeArgOffset = [&]() {
4462 /* Respect alignment of argument on the stack. */
4463 Alignment =
4464 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4465 ArgOffset = alignTo(ArgOffset, Alignment);
4466 CurArgOffset = ArgOffset;
4467 };
4468
4469 if (CallConv != CallingConv::Fast) {
4470 ComputeArgOffset();
4471
4472 /* Compute GPR index associated with argument offset. */
4473 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4474 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4475 }
4476
4477 // FIXME the codegen can be much improved in some cases.
4478 // We do not have to keep everything in memory.
4479 if (Flags.isByVal()) {
4480 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4481
4482 if (CallConv == CallingConv::Fast)
4483 ComputeArgOffset();
4484
4485 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4486 ObjSize = Flags.getByValSize();
4487 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4488 // Empty aggregate parameters do not take up registers. Examples:
4489 // struct { } a;
4490 // union { } b;
4491 // int c[0];
4492 // etc. However, we have to provide a place-holder in InVals, so
4493 // pretend we have an 8-byte item at the current address for that
4494 // purpose.
4495 if (!ObjSize) {
4496 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4497 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4498 InVals.push_back(FIN);
4499 continue;
4500 }
4501
4502 // Create a stack object covering all stack doublewords occupied
4503 // by the argument. If the argument is (fully or partially) on
4504 // the stack, or if the argument is fully in registers but the
4505 // caller has allocated the parameter save anyway, we can refer
4506 // directly to the caller's stack frame. Otherwise, create a
4507 // local copy in our own frame.
4508 int FI;
4509 if (HasParameterArea ||
4510 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4511 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4512 else
4513 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4514 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4515
4516 // Handle aggregates smaller than 8 bytes.
4517 if (ObjSize < PtrByteSize) {
4518 // The value of the object is its address, which differs from the
4519 // address of the enclosing doubleword on big-endian systems.
4520 SDValue Arg = FIN;
4521 if (!isLittleEndian) {
4522 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4523 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4524 }
4525 InVals.push_back(Arg);
4526
4527 if (GPR_idx != Num_GPR_Regs) {
4528 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4529 FuncInfo->addLiveInAttr(VReg, Flags);
4530 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4531 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4532 SDValue Store =
4533 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4534 MachinePointerInfo(&*FuncArg), ObjType);
4535 MemOps.push_back(Store);
4536 }
4537 // Whether we copied from a register or not, advance the offset
4538 // into the parameter save area by a full doubleword.
4539 ArgOffset += PtrByteSize;
4540 continue;
4541 }
4542
4543 // The value of the object is its address, which is the address of
4544 // its first stack doubleword.
4545 InVals.push_back(FIN);
4546
4547 // Store whatever pieces of the object are in registers to memory.
4548 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4549 if (GPR_idx == Num_GPR_Regs)
4550 break;
4551
4552 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4553 FuncInfo->addLiveInAttr(VReg, Flags);
4554 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4555 SDValue Addr = FIN;
4556 if (j) {
4557 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4558 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4559 }
4560 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4561 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4562 SDValue Store =
4563 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4564 MachinePointerInfo(&*FuncArg, j), ObjType);
4565 MemOps.push_back(Store);
4566 ++GPR_idx;
4567 }
4568 ArgOffset += ArgSize;
4569 continue;
4570 }
4571
4572 switch (ObjectVT.getSimpleVT().SimpleTy) {
4573 default: llvm_unreachable("Unhandled argument type!");
4574 case MVT::i1:
4575 case MVT::i32:
4576 case MVT::i64:
4577 if (Flags.isNest()) {
4578 // The 'nest' parameter, if any, is passed in R11.
4579 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4580 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4581
4582 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4583 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4584
4585 break;
4586 }
4587
4588 // These can be scalar arguments or elements of an integer array type
4589 // passed directly. Clang may use those instead of "byval" aggregate
4590 // types to avoid forcing arguments to memory unnecessarily.
4591 if (GPR_idx != Num_GPR_Regs) {
4592 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4593 FuncInfo->addLiveInAttr(VReg, Flags);
4594 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4595
4596 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4597 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4598 // value to MVT::i64 and then truncate to the correct register size.
4599 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4600 } else {
4601 if (CallConv == CallingConv::Fast)
4602 ComputeArgOffset();
4603
4604 needsLoad = true;
4605 ArgSize = PtrByteSize;
4606 }
4607 if (CallConv != CallingConv::Fast || needsLoad)
4608 ArgOffset += 8;
4609 break;
4610
4611 case MVT::f32:
4612 case MVT::f64:
4613 // These can be scalar arguments or elements of a float array type
4614 // passed directly. The latter are used to implement ELFv2 homogenous
4615 // float aggregates.
4616 if (FPR_idx != Num_FPR_Regs) {
4617 unsigned VReg;
4618
4619 if (ObjectVT == MVT::f32)
4620 VReg = MF.addLiveIn(FPR[FPR_idx],
4621 Subtarget.hasP8Vector()
4622 ? &PPC::VSSRCRegClass
4623 : &PPC::F4RCRegClass);
4624 else
4625 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4626 ? &PPC::VSFRCRegClass
4627 : &PPC::F8RCRegClass);
4628
4629 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4630 ++FPR_idx;
4631 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4632 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4633 // once we support fp <-> gpr moves.
4634
4635 // This can only ever happen in the presence of f32 array types,
4636 // since otherwise we never run out of FPRs before running out
4637 // of GPRs.
4638 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4639 FuncInfo->addLiveInAttr(VReg, Flags);
4640 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4641
4642 if (ObjectVT == MVT::f32) {
4643 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4644 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4645 DAG.getConstant(32, dl, MVT::i32));
4646 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4647 }
4648
4649 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4650 } else {
4651 if (CallConv == CallingConv::Fast)
4652 ComputeArgOffset();
4653
4654 needsLoad = true;
4655 }
4656
4657 // When passing an array of floats, the array occupies consecutive
4658 // space in the argument area; only round up to the next doubleword
4659 // at the end of the array. Otherwise, each float takes 8 bytes.
4660 if (CallConv != CallingConv::Fast || needsLoad) {
4661 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4662 ArgOffset += ArgSize;
4663 if (Flags.isInConsecutiveRegsLast())
4664 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4665 }
4666 break;
4667 case MVT::v4f32:
4668 case MVT::v4i32:
4669 case MVT::v8i16:
4670 case MVT::v16i8:
4671 case MVT::v2f64:
4672 case MVT::v2i64:
4673 case MVT::v1i128:
4674 case MVT::f128:
4675 // These can be scalar arguments or elements of a vector array type
4676 // passed directly. The latter are used to implement ELFv2 homogenous
4677 // vector aggregates.
4678 if (VR_idx != Num_VR_Regs) {
4679 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4680 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4681 ++VR_idx;
4682 } else {
4683 if (CallConv == CallingConv::Fast)
4684 ComputeArgOffset();
4685 needsLoad = true;
4686 }
4687 if (CallConv != CallingConv::Fast || needsLoad)
4688 ArgOffset += 16;
4689 break;
4690 }
4691
4692 // We need to load the argument to a virtual register if we determined
4693 // above that we ran out of physical registers of the appropriate type.
4694 if (needsLoad) {
4695 if (ObjSize < ArgSize && !isLittleEndian)
4696 CurArgOffset += ArgSize - ObjSize;
4697 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4698 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4699 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4700 }
4701
4702 InVals.push_back(ArgVal);
4703 }
4704
4705 // Area that is at least reserved in the caller of this function.
4706 unsigned MinReservedArea;
4707 if (HasParameterArea)
4708 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4709 else
4710 MinReservedArea = LinkageSize;
4711
4712 // Set the size that is at least reserved in caller of this function. Tail
4713 // call optimized functions' reserved stack space needs to be aligned so that
4714 // taking the difference between two stack areas will result in an aligned
4715 // stack.
4716 MinReservedArea =
4717 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4718 FuncInfo->setMinReservedArea(MinReservedArea);
4719
4720 // If the function takes variable number of arguments, make a frame index for
4721 // the start of the first vararg value... for expansion of llvm.va_start.
4722 // On ELFv2ABI spec, it writes:
4723 // C programs that are intended to be *portable* across different compilers
4724 // and architectures must use the header file <stdarg.h> to deal with variable
4725 // argument lists.
4726 if (isVarArg && MFI.hasVAStart()) {
4727 int Depth = ArgOffset;
4728
4729 FuncInfo->setVarArgsFrameIndex(
4730 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4731 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4732
4733 // If this function is vararg, store any remaining integer argument regs
4734 // to their spots on the stack so that they may be loaded by dereferencing
4735 // the result of va_next.
4736 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4737 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4738 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4739 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4740 SDValue Store =
4741 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4742 MemOps.push_back(Store);
4743 // Increment the address by four for the next argument to store
4744 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4745 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4746 }
4747 }
4748
4749 if (!MemOps.empty())
4750 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4751
4752 return Chain;
4753}
4754
4755/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4756/// adjusted to accommodate the arguments for the tailcall.
4757static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4758 unsigned ParamSize) {
4759
4760 if (!isTailCall) return 0;
4761
4763 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4764 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4765 // Remember only if the new adjustment is bigger.
4766 if (SPDiff < FI->getTailCallSPDelta())
4767 FI->setTailCallSPDelta(SPDiff);
4768
4769 return SPDiff;
4770}
4771
4772static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4773
4774static bool callsShareTOCBase(const Function *Caller,
4775 const GlobalValue *CalleeGV,
4776 const TargetMachine &TM) {
4777 // It does not make sense to call callsShareTOCBase() with a caller that
4778 // is PC Relative since PC Relative callers do not have a TOC.
4779#ifndef NDEBUG
4780 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4781 assert(!STICaller->isUsingPCRelativeCalls() &&
4782 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4783#endif
4784
4785 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4786 // don't have enough information to determine if the caller and callee share
4787 // the same TOC base, so we have to pessimistically assume they don't for
4788 // correctness.
4789 if (!CalleeGV)
4790 return false;
4791
4792 // If the callee is preemptable, then the static linker will use a plt-stub
4793 // which saves the toc to the stack, and needs a nop after the call
4794 // instruction to convert to a toc-restore.
4795 if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), CalleeGV))
4796 return false;
4797
4798 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4799 // We may need a TOC restore in the situation where the caller requires a
4800 // valid TOC but the callee is PC Relative and does not.
4801 const Function *F = dyn_cast<Function>(CalleeGV);
4802 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4803
4804 // If we have an Alias we can try to get the function from there.
4805 if (Alias) {
4806 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4807 F = dyn_cast<Function>(GlobalObj);
4808 }
4809
4810 // If we still have no valid function pointer we do not have enough
4811 // information to determine if the callee uses PC Relative calls so we must
4812 // assume that it does.
4813 if (!F)
4814 return false;
4815
4816 // If the callee uses PC Relative we cannot guarantee that the callee won't
4817 // clobber the TOC of the caller and so we must assume that the two
4818 // functions do not share a TOC base.
4819 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4820 if (STICallee->isUsingPCRelativeCalls())
4821 return false;
4822
4823 // If the GV is not a strong definition then we need to assume it can be
4824 // replaced by another function at link time. The function that replaces
4825 // it may not share the same TOC as the caller since the callee may be
4826 // replaced by a PC Relative version of the same function.
4827 if (!CalleeGV->isStrongDefinitionForLinker())
4828 return false;
4829
4830 // The medium and large code models are expected to provide a sufficiently
4831 // large TOC to provide all data addressing needs of a module with a
4832 // single TOC.
4833 if (CodeModel::Medium == TM.getCodeModel() ||
4834 CodeModel::Large == TM.getCodeModel())
4835 return true;
4836
4837 // Any explicitly-specified sections and section prefixes must also match.
4838 // Also, if we're using -ffunction-sections, then each function is always in
4839 // a different section (the same is true for COMDAT functions).
4840 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4841 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4842 return false;
4843 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4844 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4845 return false;
4846 }
4847
4848 return true;
4849}
4850
4851static bool
4853 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4854 assert(Subtarget.is64BitELFABI());
4855
4856 const unsigned PtrByteSize = 8;
4857 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4858
4859 static const MCPhysReg GPR[] = {
4860 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4861 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4862 };
4863 static const MCPhysReg VR[] = {
4864 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4865 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4866 };
4867
4868 const unsigned NumGPRs = std::size(GPR);
4869 const unsigned NumFPRs = 13;
4870 const unsigned NumVRs = std::size(VR);
4871 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4872
4873 unsigned NumBytes = LinkageSize;
4874 unsigned AvailableFPRs = NumFPRs;
4875 unsigned AvailableVRs = NumVRs;
4876
4877 for (const ISD::OutputArg& Param : Outs) {
4878 if (Param.Flags.isNest()) continue;
4879
4880 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4881 LinkageSize, ParamAreaSize, NumBytes,
4882 AvailableFPRs, AvailableVRs))
4883 return true;
4884 }
4885 return false;
4886}
4887
4888static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4889 if (CB.arg_size() != CallerFn->arg_size())
4890 return false;
4891
4892 auto CalleeArgIter = CB.arg_begin();
4893 auto CalleeArgEnd = CB.arg_end();
4894 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4895
4896 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4897 const Value* CalleeArg = *CalleeArgIter;
4898 const Value* CallerArg = &(*CallerArgIter);
4899 if (CalleeArg == CallerArg)
4900 continue;
4901
4902 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4903 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4904 // }
4905 // 1st argument of callee is undef and has the same type as caller.
4906 if (CalleeArg->getType() == CallerArg->getType() &&
4907 isa<UndefValue>(CalleeArg))
4908 continue;
4909
4910 return false;
4911 }
4912
4913 return true;
4914}
4915
4916// Returns true if TCO is possible between the callers and callees
4917// calling conventions.
4918static bool
4920 CallingConv::ID CalleeCC) {
4921 // Tail calls are possible with fastcc and ccc.
4922 auto isTailCallableCC = [] (CallingConv::ID CC){
4923 return CC == CallingConv::C || CC == CallingConv::Fast;
4924 };
4925 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4926 return false;
4927
4928 // We can safely tail call both fastcc and ccc callees from a c calling
4929 // convention caller. If the caller is fastcc, we may have less stack space
4930 // than a non-fastcc caller with the same signature so disable tail-calls in
4931 // that case.
4932 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4933}
4934
4935bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4936 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
4937 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
4939 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
4940 bool isCalleeExternalSymbol) const {
4941 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4942
4943 if (DisableSCO && !TailCallOpt) return false;
4944
4945 // Variadic argument functions are not supported.
4946 if (isVarArg) return false;
4947
4948 // Check that the calling conventions are compatible for tco.
4949 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
4950 return false;
4951
4952 // Caller contains any byval parameter is not supported.
4953 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4954 return false;
4955
4956 // Callee contains any byval parameter is not supported, too.
4957 // Note: This is a quick work around, because in some cases, e.g.
4958 // caller's stack size > callee's stack size, we are still able to apply
4959 // sibling call optimization. For example, gcc is able to do SCO for caller1
4960 // in the following example, but not for caller2.
4961 // struct test {
4962 // long int a;
4963 // char ary[56];
4964 // } gTest;
4965 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4966 // b->a = v.a;
4967 // return 0;
4968 // }
4969 // void caller1(struct test a, struct test c, struct test *b) {
4970 // callee(gTest, b); }
4971 // void caller2(struct test *b) { callee(gTest, b); }
4972 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4973 return false;
4974
4975 // If callee and caller use different calling conventions, we cannot pass
4976 // parameters on stack since offsets for the parameter area may be different.
4977 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
4978 return false;
4979
4980 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4981 // the caller and callee share the same TOC for TCO/SCO. If the caller and
4982 // callee potentially have different TOC bases then we cannot tail call since
4983 // we need to restore the TOC pointer after the call.
4984 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4985 // We cannot guarantee this for indirect calls or calls to external functions.
4986 // When PC-Relative addressing is used, the concept of the TOC is no longer
4987 // applicable so this check is not required.
4988 // Check first for indirect calls.
4989 if (!Subtarget.isUsingPCRelativeCalls() &&
4990 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
4991 return false;
4992
4993 // Check if we share the TOC base.
4994 if (!Subtarget.isUsingPCRelativeCalls() &&
4995 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
4996 return false;
4997
4998 // TCO allows altering callee ABI, so we don't have to check further.
4999 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5000 return true;
5001
5002 if (DisableSCO) return false;
5003
5004 // If callee use the same argument list that caller is using, then we can
5005 // apply SCO on this case. If it is not, then we need to check if callee needs
5006 // stack for passing arguments.
5007 // PC Relative tail calls may not have a CallBase.
5008 // If there is no CallBase we cannot verify if we have the same argument
5009 // list so assume that we don't have the same argument list.
5010 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5011 needStackSlotPassParameters(Subtarget, Outs))
5012 return false;
5013 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5014 return false;
5015
5016 return true;
5017}
5018
5019/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5020/// for tail call optimization. Targets which want to do tail call
5021/// optimization should implement this function.
5022bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5023 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5024 CallingConv::ID CallerCC, bool isVarArg,
5025 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5026 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5027 return false;
5028
5029 // Variable argument functions are not supported.
5030 if (isVarArg)
5031 return false;
5032
5033 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5034 // Functions containing by val parameters are not supported.
5035 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5036 return false;
5037
5038 // Non-PIC/GOT tail calls are supported.
5039 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5040 return true;
5041
5042 // At the moment we can only do local tail calls (in same module, hidden
5043 // or protected) if we are generating PIC.
5044 if (CalleeGV)
5045 return CalleeGV->hasHiddenVisibility() ||
5046 CalleeGV->hasProtectedVisibility();
5047 }
5048
5049 return false;
5050}
5051
5052/// isCallCompatibleAddress - Return the immediate to use if the specified
5053/// 32-bit value is representable in the immediate field of a BxA instruction.
5055 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5056 if (!C) return nullptr;
5057
5058 int Addr = C->getZExtValue();
5059 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5060 SignExtend32<26>(Addr) != Addr)
5061 return nullptr; // Top 6 bits have to be sext of immediate.
5062
5063 return DAG
5064 .getConstant(
5065 (int)C->getZExtValue() >> 2, SDLoc(Op),
5067 .getNode();
5068}
5069
5070namespace {
5071
5072struct TailCallArgumentInfo {
5073 SDValue Arg;
5074 SDValue FrameIdxOp;
5075 int FrameIdx = 0;
5076
5077 TailCallArgumentInfo() = default;
5078};
5079
5080} // end anonymous namespace
5081
5082/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5084 SelectionDAG &DAG, SDValue Chain,
5085 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5086 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5087 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5088 SDValue Arg = TailCallArgs[i].Arg;
5089 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5090 int FI = TailCallArgs[i].FrameIdx;
5091 // Store relative to framepointer.
5092 MemOpChains.push_back(DAG.getStore(
5093 Chain, dl, Arg, FIN,
5095 }
5096}
5097
5098/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5099/// the appropriate stack slot for the tail call optimized function call.
5101 SDValue OldRetAddr, SDValue OldFP,
5102 int SPDiff, const SDLoc &dl) {
5103 if (SPDiff) {
5104 // Calculate the new stack slot for the return address.
5106 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5107 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5108 bool isPPC64 = Subtarget.isPPC64();
5109 int SlotSize = isPPC64 ? 8 : 4;
5110 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5111 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5112 NewRetAddrLoc, true);
5113 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5114 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5115 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5116 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5117 }
5118 return Chain;
5119}
5120
5121/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5122/// the position of the argument.
5123static void
5125 SDValue Arg, int SPDiff, unsigned ArgOffset,
5126 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5127 int Offset = ArgOffset + SPDiff;
5128 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5129 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5130 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5131 SDValue FIN = DAG.getFrameIndex(FI, VT);
5132 TailCallArgumentInfo Info;
5133 Info.Arg = Arg;
5134 Info.FrameIdxOp = FIN;
5135 Info.FrameIdx = FI;
5136 TailCallArguments.push_back(Info);
5137}
5138
5139/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5140/// stack slot. Returns the chain as result and the loaded frame pointers in
5141/// LROpOut/FPOpout. Used when tail calling.
5142SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5143 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5144 SDValue &FPOpOut, const SDLoc &dl) const {
5145 if (SPDiff) {
5146 // Load the LR and FP stack slot for later adjusting.
5147 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5148 LROpOut = getReturnAddrFrameIndex(DAG);
5149 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5150 Chain = SDValue(LROpOut.getNode(), 1);
5151 }
5152 return Chain;
5153}
5154
5155/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5156/// by "Src" to address "Dst" of size "Size". Alignment information is
5157/// specified by the specific parameter attribute. The copy will be passed as
5158/// a byval function parameter.
5159/// Sometimes what we are copying is the end of a larger object, the part that
5160/// does not fit in registers.
5162 SDValue Chain, ISD::ArgFlagsTy Flags,
5163 SelectionDAG &DAG, const SDLoc &dl) {
5164 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5165 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5166 Flags.getNonZeroByValAlign(), false, false, false,
5168}
5169
5170/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5171/// tail calls.
5173 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5174 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5175 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5176 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5178 if (!isTailCall) {
5179 if (isVector) {
5180 SDValue StackPtr;
5181 if (isPPC64)
5182 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5183 else
5184 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5185 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5186 DAG.getConstant(ArgOffset, dl, PtrVT));
5187 }
5188 MemOpChains.push_back(
5189 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5190 // Calculate and remember argument location.
5191 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5192 TailCallArguments);
5193}
5194
5195static void
5197 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5198 SDValue FPOp,
5199 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5200 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5201 // might overwrite each other in case of tail call optimization.
5202 SmallVector<SDValue, 8> MemOpChains2;
5203 // Do not flag preceding copytoreg stuff together with the following stuff.
5204 InGlue = SDValue();
5205 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5206 MemOpChains2, dl);
5207 if (!MemOpChains2.empty())
5208 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5209
5210 // Store the return address to the appropriate stack slot.
5211 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5212
5213 // Emit callseq_end just before tailcall node.
5214 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5215 InGlue = Chain.getValue(1);
5216}
5217
5218// Is this global address that of a function that can be called by name? (as
5219// opposed to something that must hold a descriptor for an indirect call).
5220static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5221 if (GV) {
5222 if (GV->isThreadLocal())
5223 return false;
5224
5225 return GV->getValueType()->isFunctionTy();
5226 }
5227
5228 return false;
5229}
5230
5231SDValue PPCTargetLowering::LowerCallResult(
5232 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5233 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5234 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5236 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5237 *DAG.getContext());
5238
5239 CCRetInfo.AnalyzeCallResult(
5240 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5242 : RetCC_PPC);
5243
5244 // Copy all of the result registers out of their specified physreg.
5245 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5246 CCValAssign &VA = RVLocs[i];
5247 assert(VA.isRegLoc() && "Can only return in registers!");
5248
5249 SDValue Val;
5250
5251 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5252 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5253 InGlue);
5254 Chain = Lo.getValue(1);
5255 InGlue = Lo.getValue(2);
5256 VA = RVLocs[++i]; // skip ahead to next loc
5257 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5258 InGlue);
5259 Chain = Hi.getValue(1);
5260 InGlue = Hi.getValue(2);
5261 if (!Subtarget.isLittleEndian())
5262 std::swap (Lo, Hi);
5263 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5264 } else {
5265 Val = DAG.getCopyFromReg(Chain, dl,
5266 VA.getLocReg(), VA.getLocVT(), InGlue);
5267 Chain = Val.getValue(1);
5268 InGlue = Val.getValue(2);
5269 }
5270
5271 switch (VA.getLocInfo()) {
5272 default: llvm_unreachable("Unknown loc info!");
5273 case CCValAssign::Full: break;
5274 case CCValAssign::AExt:
5275 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5276 break;
5277 case CCValAssign::ZExt:
5278 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5279 DAG.getValueType(VA.getValVT()));
5280 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5281 break;
5282 case CCValAssign::SExt:
5283 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5284 DAG.getValueType(VA.getValVT()));
5285 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5286 break;
5287 }
5288
5289 InVals.push_back(Val);
5290 }
5291
5292 return Chain;
5293}
5294
5295static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5296 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5297 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5298 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5299
5300 // PatchPoint calls are not indirect.
5301 if (isPatchPoint)
5302 return false;
5303
5304 if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Callee))
5305 return false;
5306
5307 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5308 // becuase the immediate function pointer points to a descriptor instead of
5309 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5310 // pointer immediate points to the global entry point, while the BLA would
5311 // need to jump to the local entry point (see rL211174).
5312 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5313 isBLACompatibleAddress(Callee, DAG))
5314 return false;
5315
5316 return true;
5317}
5318
5319// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5320static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5321 return Subtarget.isAIXABI() ||
5322 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5323}
5324
5326 const Function &Caller, const SDValue &Callee,
5327 const PPCSubtarget &Subtarget,
5328 const TargetMachine &TM,
5329 bool IsStrictFPCall = false) {
5330 if (CFlags.IsTailCall)
5331 return PPCISD::TC_RETURN;
5332
5333 unsigned RetOpc = 0;
5334 // This is a call through a function pointer.
5335 if (CFlags.IsIndirect) {
5336 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5337 // indirect calls. The save of the caller's TOC pointer to the stack will be
5338 // inserted into the DAG as part of call lowering. The restore of the TOC
5339 // pointer is modeled by using a pseudo instruction for the call opcode that
5340 // represents the 2 instruction sequence of an indirect branch and link,
5341 // immediately followed by a load of the TOC pointer from the stack save
5342 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5343 // as it is not saved or used.
5345 : PPCISD::BCTRL;
5346 } else if (Subtarget.isUsingPCRelativeCalls()) {
5347 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5348 RetOpc = PPCISD::CALL_NOTOC;
5349 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5350 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5351 // immediately following the call instruction if the caller and callee may
5352 // have different TOC bases. At link time if the linker determines the calls
5353 // may not share a TOC base, the call is redirected to a trampoline inserted
5354 // by the linker. The trampoline will (among other things) save the callers
5355 // TOC pointer at an ABI designated offset in the linkage area and the
5356 // linker will rewrite the nop to be a load of the TOC pointer from the
5357 // linkage area into gpr2.
5358 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5359 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5360 RetOpc =
5362 } else
5363 RetOpc = PPCISD::CALL;
5364 if (IsStrictFPCall) {
5365 switch (RetOpc) {
5366 default:
5367 llvm_unreachable("Unknown call opcode");
5370 break;
5371 case PPCISD::BCTRL:
5372 RetOpc = PPCISD::BCTRL_RM;
5373 break;
5374 case PPCISD::CALL_NOTOC:
5375 RetOpc = PPCISD::CALL_NOTOC_RM;
5376 break;
5377 case PPCISD::CALL:
5378 RetOpc = PPCISD::CALL_RM;
5379 break;
5380 case PPCISD::CALL_NOP:
5381 RetOpc = PPCISD::CALL_NOP_RM;
5382 break;
5383 }
5384 }
5385 return RetOpc;
5386}
5387
5388static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5389 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5390 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5391 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5392 return SDValue(Dest, 0);
5393
5394 // Returns true if the callee is local, and false otherwise.
5395 auto isLocalCallee = [&]() {
5396 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5398 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5399
5400 return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5401 !isa_and_nonnull<GlobalIFunc>(GV);
5402 };
5403
5404 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5405 // a static relocation model causes some versions of GNU LD (2.17.50, at
5406 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5407 // built with secure-PLT.
5408 bool UsePlt =
5409 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5411
5412 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5413 const TargetMachine &TM = Subtarget.getTargetMachine();
5414 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5415 MCSymbolXCOFF *S =
5416 cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5417
5419 return DAG.getMCSymbol(S, PtrVT);
5420 };
5421
5422 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5423 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5424 if (isFunctionGlobalAddress(GV)) {
5425 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5426
5427 if (Subtarget.isAIXABI()) {
5428 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5429 return getAIXFuncEntryPointSymbolSDNode(GV);
5430 }
5431 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5432 UsePlt ? PPCII::MO_PLT : 0);
5433 }
5434
5435 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5436 const char *SymName = S->getSymbol();
5437 if (Subtarget.isAIXABI()) {
5438 // If there exists a user-declared function whose name is the same as the
5439 // ExternalSymbol's, then we pick up the user-declared version.
5441 if (const Function *F =
5442 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5443 return getAIXFuncEntryPointSymbolSDNode(F);
5444
5445 // On AIX, direct function calls reference the symbol for the function's
5446 // entry point, which is named by prepending a "." before the function's
5447 // C-linkage name. A Qualname is returned here because an external
5448 // function entry point is a csect with XTY_ER property.
5449 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5450 auto &Context = DAG.getMachineFunction().getMMI().getContext();
5451 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5452 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5454 return Sec->getQualNameSymbol();
5455 };
5456
5457 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5458 }
5459 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5460 UsePlt ? PPCII::MO_PLT : 0);
5461 }
5462
5463 // No transformation needed.
5464 assert(Callee.getNode() && "What no callee?");
5465 return Callee;
5466}
5467
5469 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5470 "Expected a CALLSEQ_STARTSDNode.");
5471
5472 // The last operand is the chain, except when the node has glue. If the node
5473 // has glue, then the last operand is the glue, and the chain is the second
5474 // last operand.
5475 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5476 if (LastValue.getValueType() != MVT::Glue)
5477 return LastValue;
5478
5479 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5480}
5481
5482// Creates the node that moves a functions address into the count register
5483// to prepare for an indirect call instruction.
5484static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5485 SDValue &Glue, SDValue &Chain,
5486 const SDLoc &dl) {
5487 SDValue MTCTROps[] = {Chain, Callee, Glue};
5488 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5489 Chain = DAG.getNode(PPCISD::MTCTR, dl, ArrayRef(ReturnTypes, 2),
5490 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5491 // The glue is the second value produced.
5492 Glue = Chain.getValue(1);
5493}
5494
5496 SDValue &Glue, SDValue &Chain,
5497 SDValue CallSeqStart,
5498 const CallBase *CB, const SDLoc &dl,
5499 bool hasNest,
5500 const PPCSubtarget &Subtarget) {
5501 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5502 // entry point, but to the function descriptor (the function entry point
5503 // address is part of the function descriptor though).
5504 // The function descriptor is a three doubleword structure with the
5505 // following fields: function entry point, TOC base address and
5506 // environment pointer.
5507 // Thus for a call through a function pointer, the following actions need
5508 // to be performed:
5509 // 1. Save the TOC of the caller in the TOC save area of its stack
5510 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5511 // 2. Load the address of the function entry point from the function
5512 // descriptor.
5513 // 3. Load the TOC of the callee from the function descriptor into r2.
5514 // 4. Load the environment pointer from the function descriptor into
5515 // r11.
5516 // 5. Branch to the function entry point address.
5517 // 6. On return of the callee, the TOC of the caller needs to be
5518 // restored (this is done in FinishCall()).
5519 //
5520 // The loads are scheduled at the beginning of the call sequence, and the
5521 // register copies are flagged together to ensure that no other
5522 // operations can be scheduled in between. E.g. without flagging the
5523 // copies together, a TOC access in the caller could be scheduled between
5524 // the assignment of the callee TOC and the branch to the callee, which leads
5525 // to incorrect code.
5526
5527 // Start by loading the function address from the descriptor.
5528 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5529 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5533
5534 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5535
5536 // Registers used in building the DAG.
5537 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5538 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5539
5540 // Offsets of descriptor members.
5541 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5542 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5543
5544 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5545 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5546
5547 // One load for the functions entry point address.
5548 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5549 Alignment, MMOFlags);
5550
5551 // One for loading the TOC anchor for the module that contains the called
5552 // function.
5553 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5554 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5555 SDValue TOCPtr =
5556 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5557 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5558
5559 // One for loading the environment pointer.
5560 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5561 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5562 SDValue LoadEnvPtr =
5563 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5564 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5565
5566
5567 // Then copy the newly loaded TOC anchor to the TOC pointer.
5568 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5569 Chain = TOCVal.getValue(0);
5570 Glue = TOCVal.getValue(1);
5571
5572 // If the function call has an explicit 'nest' parameter, it takes the
5573 // place of the environment pointer.
5574 assert((!hasNest || !Subtarget.isAIXABI()) &&
5575 "Nest parameter is not supported on AIX.");
5576 if (!hasNest) {
5577 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5578 Chain = EnvVal.getValue(0);
5579 Glue = EnvVal.getValue(1);
5580 }
5581
5582 // The rest of the indirect call sequence is the same as the non-descriptor
5583 // DAG.
5584 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5585}
5586
5587static void
5589 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5590 SelectionDAG &DAG,
5591 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5592 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5593 const PPCSubtarget &Subtarget) {
5594 const bool IsPPC64 = Subtarget.isPPC64();
5595 // MVT for a general purpose register.
5596 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5597
5598 // First operand is always the chain.
5599 Ops.push_back(Chain);
5600
5601 // If it's a direct call pass the callee as the second operand.
5602 if (!CFlags.IsIndirect)
5603 Ops.push_back(Callee);
5604 else {
5605 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5606
5607 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5608 // on the stack (this would have been done in `LowerCall_64SVR4` or
5609 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5610 // represents both the indirect branch and a load that restores the TOC
5611 // pointer from the linkage area. The operand for the TOC restore is an add
5612 // of the TOC save offset to the stack pointer. This must be the second
5613 // operand: after the chain input but before any other variadic arguments.
5614 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5615 // saved or used.
5616 if (isTOCSaveRestoreRequired(Subtarget)) {
5617 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5618
5619 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5620 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5621 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5622 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5623 Ops.push_back(AddTOC);
5624 }
5625
5626 // Add the register used for the environment pointer.
5627 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5629 RegVT));
5630
5631
5632 // Add CTR register as callee so a bctr can be emitted later.
5633 if (CFlags.IsTailCall)
5634 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5635 }
5636
5637 // If this is a tail call add stack pointer delta.
5638 if (CFlags.IsTailCall)
5639 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5640
5641 // Add argument registers to the end of the list so that they are known live
5642 // into the call.
5643 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5644 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5645 RegsToPass[i].second.getValueType()));
5646
5647 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5648 // no way to mark dependencies as implicit here.
5649 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5650 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5651 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5652 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5653
5654 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5655 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5656 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5657
5658 // Add a register mask operand representing the call-preserved registers.
5659 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5660 const uint32_t *Mask =
5661 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5662 assert(Mask && "Missing call preserved mask for calling convention");
5663 Ops.push_back(DAG.getRegisterMask(Mask));
5664
5665 // If the glue is valid, it is the last operand.
5666 if (Glue.getNode())
5667 Ops.push_back(Glue);
5668}
5669
5670SDValue PPCTargetLowering::FinishCall(
5671 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5672 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5673 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5674 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5675 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5676
5677 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5678 Subtarget.isAIXABI())
5679 setUsesTOCBasePtr(DAG);
5680
5681 unsigned CallOpc =
5682 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5683 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5684
5685 if (!CFlags.IsIndirect)
5686 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5687 else if (Subtarget.usesFunctionDescriptors())
5688 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5689 dl, CFlags.HasNest, Subtarget);
5690 else
5691 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5692
5693 // Build the operand list for the call instruction.
5695 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5696 SPDiff, Subtarget);
5697
5698 // Emit tail call.
5699 if (CFlags.IsTailCall) {
5700 // Indirect tail call when using PC Relative calls do not have the same
5701 // constraints.
5702 assert(((Callee.getOpcode() == ISD::Register &&
5703 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5704 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5705 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5706 isa<ConstantSDNode>(Callee) ||
5707 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5708 "Expecting a global address, external symbol, absolute value, "
5709 "register or an indirect tail call when PC Relative calls are "
5710 "used.");
5711 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5712 assert(CallOpc == PPCISD::TC_RETURN &&
5713 "Unexpected call opcode for a tail call.");
5715 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5716 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5717 return Ret;
5718 }
5719
5720 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5721 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5722 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5723 Glue = Chain.getValue(1);
5724
5725 // When performing tail call optimization the callee pops its arguments off
5726 // the stack. Account for this here so these bytes can be pushed back on in
5727 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5728 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5730 ? NumBytes
5731 : 0;
5732
5733 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5734 Glue = Chain.getValue(1);
5735
5736 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5737 DAG, InVals);
5738}
5739
5741 CallingConv::ID CalleeCC = CB->getCallingConv();
5742 const Function *CallerFunc = CB->getCaller();
5743 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5744 const Function *CalleeFunc = CB->getCalledFunction();
5745 if (!CalleeFunc)
5746 return false;
5747 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5748
5751
5752 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5753 CalleeFunc->getAttributes(), Outs, *this,
5754 CalleeFunc->getParent()->getDataLayout());
5755
5756 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5757 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5758 false /*isCalleeExternalSymbol*/);
5759}
5760
5761bool PPCTargetLowering::isEligibleForTCO(
5762 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5763 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5765 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5766 bool isCalleeExternalSymbol) const {
5767 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5768 return false;
5769
5770 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5771 return IsEligibleForTailCallOptimization_64SVR4(
5772 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5773 isCalleeExternalSymbol);
5774 else
5775 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5776 isVarArg, Ins);
5777}
5778
5779SDValue
5780PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5781 SmallVectorImpl<SDValue> &InVals) const {
5782 SelectionDAG &DAG = CLI.DAG;
5783 SDLoc &dl = CLI.DL;
5785 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5787 SDValue Chain = CLI.Chain;
5788 SDValue Callee = CLI.Callee;
5789 bool &isTailCall = CLI.IsTailCall;
5790 CallingConv::ID CallConv = CLI.CallConv;
5791 bool isVarArg = CLI.IsVarArg;
5792 bool isPatchPoint = CLI.IsPatchPoint;
5793 const CallBase *CB = CLI.CB;
5794
5795 if (isTailCall) {
5797 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5798 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5799 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5800 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5801
5802 isTailCall =
5803 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5804 &(MF.getFunction()), IsCalleeExternalSymbol);
5805 if (isTailCall) {
5806 ++NumTailCalls;
5807 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5808 ++NumSiblingCalls;
5809
5810 // PC Relative calls no longer guarantee that the callee is a Global
5811 // Address Node. The callee could be an indirect tail call in which
5812 // case the SDValue for the callee could be a load (to load the address
5813 // of a function pointer) or it may be a register copy (to move the
5814 // address of the callee from a function parameter into a virtual
5815 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5816 assert((Subtarget.isUsingPCRelativeCalls() ||
5817 isa<GlobalAddressSDNode>(Callee)) &&
5818 "Callee should be an llvm::Function object.");
5819
5820 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5821 << "\nTCO callee: ");
5822 LLVM_DEBUG(Callee.dump());
5823 }
5824 }
5825
5826 if (!isTailCall && CB && CB->isMustTailCall())
5827 report_fatal_error("failed to perform tail call elimination on a call "
5828 "site marked musttail");
5829
5830 // When long calls (i.e. indirect calls) are always used, calls are always
5831 // made via function pointer. If we have a function name, first translate it
5832 // into a pointer.
5833 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5834 !isTailCall)
5835 Callee = LowerGlobalAddress(Callee, DAG);
5836
5837 CallFlags CFlags(
5838 CallConv, isTailCall, isVarArg, isPatchPoint,
5839 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5840 // hasNest
5841 Subtarget.is64BitELFABI() &&
5842 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5843 CLI.NoMerge);
5844
5845 if (Subtarget.isAIXABI())
5846 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5847 InVals, CB);
5848
5849 assert(Subtarget.isSVR4ABI());
5850 if (Subtarget.isPPC64())
5851 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5852 InVals, CB);
5853 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5854 InVals, CB);
5855}
5856
5857SDValue PPCTargetLowering::LowerCall_32SVR4(
5858 SDValue Chain, SDValue Callee, CallFlags CFlags,
5860 const SmallVectorImpl<SDValue> &OutVals,
5861 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5863 const CallBase *CB) const {
5864 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5865 // of the 32-bit SVR4 ABI stack frame layout.
5866
5867 const CallingConv::ID CallConv = CFlags.CallConv;
5868 const bool IsVarArg = CFlags.IsVarArg;
5869 const bool IsTailCall = CFlags.IsTailCall;
5870
5871 assert((CallConv == CallingConv::C ||
5872 CallConv == CallingConv::Cold ||
5873 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5874
5875 const Align PtrAlign(4);
5876
5878
5879 // Mark this function as potentially containing a function that contains a
5880 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5881 // and restoring the callers stack pointer in this functions epilog. This is
5882 // done because by tail calling the called function might overwrite the value
5883 // in this function's (MF) stack pointer stack slot 0(SP).
5884 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5885 CallConv == CallingConv::Fast)
5886 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5887
5888 // Count how many bytes are to be pushed on the stack, including the linkage
5889 // area, parameter list area and the part of the local variable space which
5890 // contains copies of aggregates which are passed by value.
5891
5892 // Assign locations to all of the outgoing arguments.
5894 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5895
5896 // Reserve space for the linkage area on the stack.
5897 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5898 PtrAlign);
5899 if (useSoftFloat())
5900 CCInfo.PreAnalyzeCallOperands(Outs);
5901
5902 if (IsVarArg) {
5903 // Handle fixed and variable vector arguments differently.
5904 // Fixed vector arguments go into registers as long as registers are
5905 // available. Variable vector arguments always go into memory.
5906 unsigned NumArgs = Outs.size();
5907
5908 for (unsigned i = 0; i != NumArgs; ++i) {
5909 MVT ArgVT = Outs[i].VT;
5910 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5911 bool Result;
5912
5913 if (Outs[i].IsFixed) {
5914 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5915 CCInfo);
5916 } else {
5918 ArgFlags, CCInfo);
5919 }
5920
5921 if (Result) {
5922#ifndef NDEBUG
5923 errs() << "Call operand #" << i << " has unhandled type "
5924 << ArgVT << "\n";
5925#endif
5926 llvm_unreachable(nullptr);
5927 }
5928 }
5929 } else {
5930 // All arguments are treated the same.
5931 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5932 }
5933 CCInfo.clearWasPPCF128();
5934
5935 // Assign locations to all of the outgoing aggregate by value arguments.
5936 SmallVector<CCValAssign, 16> ByValArgLocs;
5937 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5938
5939 // Reserve stack space for the allocations in CCInfo.
5940 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
5941
5942 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5943
5944 // Size of the linkage area, parameter list area and the part of the local
5945 // space variable where copies of aggregates which are passed by value are
5946 // stored.
5947 unsigned NumBytes = CCByValInfo.getStackSize();
5948
5949 // Calculate by how many bytes the stack has to be adjusted in case of tail
5950 // call optimization.
5951 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5952
5953 // Adjust the stack pointer for the new arguments...
5954 // These operations are automatically eliminated by the prolog/epilog pass
5955 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5956 SDValue CallSeqStart = Chain;
5957
5958 // Load the return address and frame pointer so it can be moved somewhere else
5959 // later.
5960 SDValue LROp, FPOp;
5961 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5962
5963 // Set up a copy of the stack pointer for use loading and storing any
5964 // arguments that may not fit in the registers available for argument
5965 // passing.
5966 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5967
5969 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5970 SmallVector<SDValue, 8> MemOpChains;
5971
5972 bool seenFloatArg = false;
5973 // Walk the register/memloc assignments, inserting copies/loads.
5974 // i - Tracks the index into the list of registers allocated for the call
5975 // RealArgIdx - Tracks the index into the list of actual function arguments
5976 // j - Tracks the index into the list of byval arguments
5977 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5978 i != e;
5979 ++i, ++RealArgIdx) {
5980 CCValAssign &VA = ArgLocs[i];
5981 SDValue Arg = OutVals[RealArgIdx];
5982 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5983
5984 if (Flags.isByVal()) {
5985 // Argument is an aggregate which is passed by value, thus we need to
5986 // create a copy of it in the local variable space of the current stack
5987 // frame (which is the stack frame of the caller) and pass the address of
5988 // this copy to the callee.
5989 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5990 CCValAssign &ByValVA = ByValArgLocs[j++];
5991 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5992
5993 // Memory reserved in the local variable space of the callers stack frame.
5994 unsigned LocMemOffset = ByValVA.getLocMemOffset();
5995
5996 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5997 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5998 StackPtr, PtrOff);
5999
6000 // Create a copy of the argument in the local area of the current
6001 // stack frame.
6002 SDValue MemcpyCall =
6003 CreateCopyOfByValArgument(Arg, PtrOff,
6004 CallSeqStart.getNode()->getOperand(0),
6005 Flags, DAG, dl);
6006
6007 // This must go outside the CALLSEQ_START..END.
6008 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6009 SDLoc(MemcpyCall));
6010 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6011 NewCallSeqStart.getNode());
6012 Chain = CallSeqStart = NewCallSeqStart;
6013
6014 // Pass the address of the aggregate copy on the stack either in a
6015 // physical register or in the parameter list area of the current stack
6016 // frame to the callee.
6017 Arg = PtrOff;
6018 }
6019
6020 // When useCRBits() is true, there can be i1 arguments.
6021 // It is because getRegisterType(MVT::i1) => MVT::i1,
6022 // and for other integer types getRegisterType() => MVT::i32.
6023 // Extend i1 and ensure callee will get i32.
6024 if (Arg.getValueType() == MVT::i1)
6025 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6026 dl, MVT::i32, Arg);
6027
6028 if (VA.isRegLoc()) {
6029 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6030 // Put argument in a physical register.
6031 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6032 bool IsLE = Subtarget.isLittleEndian();
6033 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6034 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6035 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6036 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6037 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6038 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6039 SVal.getValue(0)));
6040 } else
6041 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6042 } else {
6043 // Put argument in the parameter list area of the current stack frame.
6044 assert(VA.isMemLoc());
6045 unsigned LocMemOffset = VA.getLocMemOffset();
6046
6047 if (!IsTailCall) {
6048 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6049 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6050 StackPtr, PtrOff);
6051
6052 MemOpChains.push_back(
6053 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6054 } else {
6055 // Calculate and remember argument location.
6056 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6057 TailCallArguments);
6058 }
6059 }
6060 }
6061
6062 if (!MemOpChains.empty())
6063 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6064
6065 // Build a sequence of copy-to-reg nodes chained together with token chain
6066 // and flag operands which copy the outgoing args into the appropriate regs.
6067 SDValue InGlue;
6068 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6069 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6070 RegsToPass[i].second, InGlue);
6071 InGlue = Chain.getValue(1);
6072 }
6073
6074 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6075 // registers.
6076 if (IsVarArg) {
6077 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6078 SDValue Ops[] = { Chain, InGlue };
6079
6080 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6081 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6082
6083 InGlue = Chain.getValue(1);
6084 }
6085
6086 if (IsTailCall)
6087 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6088 TailCallArguments);
6089
6090 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6091 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6092}
6093
6094// Copy an argument into memory, being careful to do this outside the
6095// call sequence for the call to which the argument belongs.
6096SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6097 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6098 SelectionDAG &DAG, const SDLoc &dl) const {
6099 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6100 CallSeqStart.getNode()->getOperand(0),
6101 Flags, DAG, dl);
6102 // The MEMCPY must go outside the CALLSEQ_START..END.
6103 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6104 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6105 SDLoc(MemcpyCall));
6106 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6107 NewCallSeqStart.getNode());
6108 return NewCallSeqStart;
6109}
6110
6111SDValue PPCTargetLowering::LowerCall_64SVR4(
6112 SDValue Chain, SDValue Callee, CallFlags CFlags,
6114 const SmallVectorImpl<SDValue> &OutVals,
6115 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6117 const CallBase *CB) const {
6118 bool isELFv2ABI = Subtarget.isELFv2ABI();
6119 bool isLittleEndian = Subtarget.isLittleEndian();
6120 unsigned NumOps = Outs.size();
6121 bool IsSibCall = false;
6122 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6123
6124 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6125 unsigned PtrByteSize = 8;
6126
6128
6129 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6130 IsSibCall = true;
6131
6132 // Mark this function as potentially containing a function that contains a
6133 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6134 // and restoring the callers stack pointer in this functions epilog. This is
6135 // done because by tail calling the called function might overwrite the value
6136 // in this function's (MF) stack pointer stack slot 0(SP).
6137 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6138 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6139
6140 assert(!(IsFastCall && CFlags.IsVarArg) &&
6141 "fastcc not supported on varargs functions");
6142
6143 // Count how many bytes are to be pushed on the stack, including the linkage
6144 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6145 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6146 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6147 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6148 unsigned NumBytes = LinkageSize;
6149 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6150
6151 static const MCPhysReg GPR[] = {
6152 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6153 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6154 };
6155 static const MCPhysReg VR[] = {
6156 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6157 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6158 };
6159
6160 const unsigned NumGPRs = std::size(GPR);
6161 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6162 const unsigned NumVRs = std::size(VR);
6163
6164 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6165 // can be passed to the callee in registers.
6166 // For the fast calling convention, there is another check below.
6167 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6168 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6169 if (!HasParameterArea) {
6170 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6171 unsigned AvailableFPRs = NumFPRs;
6172 unsigned AvailableVRs = NumVRs;
6173 unsigned NumBytesTmp = NumBytes;
6174 for (unsigned i = 0; i != NumOps; ++i) {
6175 if (Outs[i].Flags.isNest()) continue;
6176 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6177 PtrByteSize, LinkageSize, ParamAreaSize,
6178 NumBytesTmp, AvailableFPRs, AvailableVRs))
6179 HasParameterArea = true;
6180 }
6181 }
6182
6183 // When using the fast calling convention, we don't provide backing for
6184 // arguments that will be in registers.
6185 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6186
6187 // Avoid allocating parameter area for fastcc functions if all the arguments
6188 // can be passed in the registers.
6189 if (IsFastCall)
6190 HasParameterArea = false;
6191
6192 // Add up all the space actually used.
6193 for (unsigned i = 0; i != NumOps; ++i) {
6194 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6195 EVT ArgVT = Outs[i].VT;
6196 EVT OrigVT = Outs[i].ArgVT;
6197
6198 if (Flags.isNest())
6199 continue;
6200
6201 if (IsFastCall) {
6202 if (Flags.isByVal()) {
6203 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6204 if (NumGPRsUsed > NumGPRs)
6205 HasParameterArea = true;
6206 } else {
6207 switch (ArgVT.getSimpleVT().SimpleTy) {
6208 default: llvm_unreachable("Unexpected ValueType for argument!");
6209 case MVT::i1:
6210 case MVT::i32:
6211 case MVT::i64:
6212 if (++NumGPRsUsed <= NumGPRs)
6213 continue;
6214 break;
6215 case MVT::v4i32:
6216 case MVT::v8i16:
6217 case MVT::v16i8:
6218 case MVT::v2f64:
6219 case MVT::v2i64:
6220 case MVT::v1i128:
6221 case MVT::f128:
6222 if (++NumVRsUsed <= NumVRs)
6223 continue;
6224 break;
6225 case MVT::v4f32:
6226 if (++NumVRsUsed <= NumVRs)
6227 continue;
6228 break;
6229 case MVT::f32:
6230 case MVT::f64:
6231 if (++NumFPRsUsed <= NumFPRs)
6232 continue;
6233 break;
6234 }
6235 HasParameterArea = true;
6236 }
6237 }
6238
6239 /* Respect alignment of argument on the stack. */
6240 auto Alignement =
6241 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6242 NumBytes = alignTo(NumBytes, Alignement);
6243
6244 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6245 if (Flags.isInConsecutiveRegsLast())
6246 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6247 }
6248
6249 unsigned NumBytesActuallyUsed = NumBytes;
6250
6251 // In the old ELFv1 ABI,
6252 // the prolog code of the callee may store up to 8 GPR argument registers to
6253 // the stack, allowing va_start to index over them in memory if its varargs.
6254 // Because we cannot tell if this is needed on the caller side, we have to
6255 // conservatively assume that it is needed. As such, make sure we have at
6256 // least enough stack space for the caller to store the 8 GPRs.
6257 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6258 // really requires memory operands, e.g. a vararg function.
6259 if (HasParameterArea)
6260 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6261 else
6262 NumBytes = LinkageSize;
6263
6264 // Tail call needs the stack to be aligned.
6265 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6266 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6267
6268 int SPDiff = 0;
6269
6270 // Calculate by how many bytes the stack has to be adjusted in case of tail
6271 // call optimization.
6272 if (!IsSibCall)
6273 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6274
6275 // To protect arguments on the stack from being clobbered in a tail call,
6276 // force all the loads to happen before doing any other lowering.
6277 if (CFlags.IsTailCall)
6278 Chain = DAG.getStackArgumentTokenFactor(Chain);
6279
6280 // Adjust the stack pointer for the new arguments...
6281 // These operations are automatically eliminated by the prolog/epilog pass
6282 if (!IsSibCall)
6283 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6284 SDValue CallSeqStart = Chain;
6285
6286 // Load the return address and frame pointer so it can be move somewhere else
6287 // later.
6288 SDValue LROp, FPOp;
6289 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6290
6291 // Set up a copy of the stack pointer for use loading and storing any
6292 // arguments that may not fit in the registers available for argument
6293 // passing.
6294 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6295
6296 // Figure out which arguments are going to go in registers, and which in
6297 // memory. Also, if this is a vararg function, floating point operations
6298 // must be stored to our stack, and loaded into integer regs as well, if
6299 // any integer regs are available for argument passing.
6300 unsigned ArgOffset = LinkageSize;
6301
6303 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6304
6305 SmallVector<SDValue, 8> MemOpChains;
6306 for (unsigned i = 0; i != NumOps; ++i) {
6307 SDValue Arg = OutVals[i];
6308 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6309 EVT ArgVT = Outs[i].VT;
6310 EVT OrigVT = Outs[i].ArgVT;
6311
6312 // PtrOff will be used to store the current argument to the stack if a
6313 // register cannot be found for it.
6314 SDValue PtrOff;
6315
6316 // We re-align the argument offset for each argument, except when using the
6317 // fast calling convention, when we need to make sure we do that only when
6318 // we'll actually use a stack slot.
6319 auto ComputePtrOff = [&]() {
6320 /* Respect alignment of argument on the stack. */
6321 auto Alignment =
6322 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6323 ArgOffset = alignTo(ArgOffset, Alignment);
6324
6325 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6326
6327 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6328 };
6329
6330 if (!IsFastCall) {
6331 ComputePtrOff();
6332
6333 /* Compute GPR index associated with argument offset. */
6334 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6335 GPR_idx = std::min(GPR_idx, NumGPRs);
6336 }
6337
6338 // Promote integers to 64-bit values.
6339 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6340 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6341 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6342 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6343 }
6344
6345 // FIXME memcpy is used way more than necessary. Correctness first.
6346 // Note: "by value" is code for passing a structure by value, not
6347 // basic types.
6348 if (Flags.isByVal()) {
6349 // Note: Size includes alignment padding, so
6350 // struct x { short a; char b; }
6351 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6352 // These are the proper values we need for right-justifying the
6353 // aggregate in a parameter register.
6354 unsigned Size = Flags.getByValSize();
6355
6356 // An empty aggregate parameter takes up no storage and no
6357 // registers.
6358 if (Size == 0)
6359 continue;
6360
6361 if (IsFastCall)
6362 ComputePtrOff();
6363
6364 // All aggregates smaller than 8 bytes must be passed right-justified.
6365 if (Size==1 || Size==2 || Size==4) {
6366 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6367 if (GPR_idx != NumGPRs) {
6368 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6369 MachinePointerInfo(), VT);
6370 MemOpChains.push_back(Load.getValue(1));
6371 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6372
6373 ArgOffset += PtrByteSize;
6374 continue;
6375 }
6376 }
6377
6378 if (GPR_idx == NumGPRs && Size < 8) {
6379 SDValue AddPtr = PtrOff;
6380 if (!isLittleEndian) {
6381 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6382 PtrOff.getValueType());
6383 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6384 }
6385 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6386 CallSeqStart,
6387 Flags, DAG, dl);
6388 ArgOffset += PtrByteSize;
6389 continue;
6390 }
6391 // Copy the object to parameter save area if it can not be entirely passed
6392 // by registers.
6393 // FIXME: we only need to copy the parts which need to be passed in
6394 // parameter save area. For the parts passed by registers, we don't need
6395 // to copy them to the stack although we need to allocate space for them
6396 // in parameter save area.
6397 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6398 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6399 CallSeqStart,
6400 Flags, DAG, dl);
6401
6402 // When a register is available, pass a small aggregate right-justified.
6403 if (Size < 8 && GPR_idx != NumGPRs) {
6404 // The easiest way to get this right-justified in a register
6405 // is to copy the structure into the rightmost portion of a
6406 // local variable slot, then load the whole slot into the
6407 // register.
6408 // FIXME: The memcpy seems to produce pretty awful code for
6409 // small aggregates, particularly for packed ones.
6410 // FIXME: It would be preferable to use the slot in the
6411 // parameter save area instead of a new local variable.
6412 SDValue AddPtr = PtrOff;
6413 if (!isLittleEndian) {
6414 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6415 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6416 }
6417 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6418 CallSeqStart,
6419 Flags, DAG, dl);
6420
6421 // Load the slot into the register.
6422 SDValue Load =
6423 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6424 MemOpChains.push_back(Load.getValue(1));
6425 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6426
6427 // Done with this argument.
6428 ArgOffset += PtrByteSize;
6429 continue;
6430 }
6431
6432 // For aggregates larger than PtrByteSize, copy the pieces of the
6433 // object that fit into registers from the parameter save area.
6434 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6435 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6436 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6437 if (GPR_idx != NumGPRs) {
6438 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6439 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6440 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6441 MachinePointerInfo(), ObjType);
6442
6443 MemOpChains.push_back(Load.getValue(1));
6444 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6445 ArgOffset += PtrByteSize;
6446 } else {
6447 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6448 break;
6449 }
6450 }
6451 continue;
6452 }
6453
6454 switch (Arg.getSimpleValueType().SimpleTy) {
6455 default: llvm_unreachable("Unexpected ValueType for argument!");
6456 case MVT::i1:
6457 case MVT::i32:
6458 case MVT::i64:
6459 if (Flags.isNest()) {
6460 // The 'nest' parameter, if any, is passed in R11.
6461 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6462 break;
6463 }
6464
6465 // These can be scalar arguments or elements of an integer array type
6466 // passed directly. Clang may use those instead of "byval" aggregate
6467 // types to avoid forcing arguments to memory unnecessarily.
6468 if (GPR_idx != NumGPRs) {
6469 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6470 } else {
6471 if (IsFastCall)
6472 ComputePtrOff();
6473
6474 assert(HasParameterArea &&
6475 "Parameter area must exist to pass an argument in memory.");
6476 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6477 true, CFlags.IsTailCall, false, MemOpChains,
6478 TailCallArguments, dl);
6479 if (IsFastCall)
6480 ArgOffset += PtrByteSize;
6481 }
6482 if (!IsFastCall)
6483 ArgOffset += PtrByteSize;
6484 break;
6485 case MVT::f32:
6486 case MVT::f64: {
6487 // These can be scalar arguments or elements of a float array type
6488 // passed directly. The latter are used to implement ELFv2 homogenous
6489 // float aggregates.
6490
6491 // Named arguments go into FPRs first, and once they overflow, the
6492 // remaining arguments go into GPRs and then the parameter save area.
6493 // Unnamed arguments for vararg functions always go to GPRs and
6494 // then the parameter save area. For now, put all arguments to vararg
6495 // routines always in both locations (FPR *and* GPR or stack slot).
6496 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6497 bool NeededLoad = false;
6498
6499 // First load the argument into the next available FPR.
6500 if (FPR_idx != NumFPRs)
6501 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6502
6503 // Next, load the argument into GPR or stack slot if needed.
6504 if (!NeedGPROrStack)
6505 ;
6506 else if (GPR_idx != NumGPRs && !IsFastCall) {
6507 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6508 // once we support fp <-> gpr moves.
6509
6510 // In the non-vararg case, this can only ever happen in the
6511 // presence of f32 array types, since otherwise we never run
6512 // out of FPRs before running out of GPRs.
6513 SDValue ArgVal;
6514
6515 // Double values are always passed in a single GPR.
6516 if (Arg.getValueType() != MVT::f32) {
6517 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6518
6519 // Non-array float values are extended and passed in a GPR.
6520 } else if (!Flags.isInConsecutiveRegs()) {
6521 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6522 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6523
6524 // If we have an array of floats, we collect every odd element
6525 // together with its predecessor into one GPR.
6526 } else if (ArgOffset % PtrByteSize != 0) {
6527 SDValue Lo, Hi;
6528 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6529 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6530 if (!isLittleEndian)
6531 std::swap(Lo, Hi);
6532 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6533
6534 // The final element, if even, goes into the first half of a GPR.
6535 } else if (Flags.isInConsecutiveRegsLast()) {
6536 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6537 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6538 if (!isLittleEndian)
6539 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6540 DAG.getConstant(32, dl, MVT::i32));
6541
6542 // Non-final even elements are skipped; they will be handled
6543 // together the with subsequent argument on the next go-around.
6544 } else
6545 ArgVal = SDValue();
6546
6547 if (ArgVal.getNode())
6548 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6549 } else {
6550 if (IsFastCall)
6551 ComputePtrOff();
6552
6553 // Single-precision floating-point values are mapped to the
6554 // second (rightmost) word of the stack doubleword.
6555 if (Arg.getValueType() == MVT::f32 &&
6556 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6557 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6558 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6559 }
6560
6561 assert(HasParameterArea &&
6562 "Parameter area must exist to pass an argument in memory.");
6563 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6564 true, CFlags.IsTailCall, false, MemOpChains,
6565 TailCallArguments, dl);
6566
6567 NeededLoad = true;
6568 }
6569 // When passing an array of floats, the array occupies consecutive
6570 // space in the argument area; only round up to the next doubleword
6571 // at the end of the array. Otherwise, each float takes 8 bytes.
6572 if (!IsFastCall || NeededLoad) {
6573 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6574 Flags.isInConsecutiveRegs()) ? 4 : 8;
6575 if (Flags.isInConsecutiveRegsLast())
6576 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6577 }
6578 break;
6579 }
6580 case MVT::v4f32:
6581 case MVT::v4i32:
6582 case MVT::v8i16:
6583 case MVT::v16i8:
6584 case MVT::v2f64:
6585 case MVT::v2i64:
6586 case MVT::v1i128:
6587 case MVT::f128:
6588 // These can be scalar arguments or elements of a vector array type
6589 // passed directly. The latter are used to implement ELFv2 homogenous
6590 // vector aggregates.
6591
6592 // For a varargs call, named arguments go into VRs or on the stack as
6593 // usual; unnamed arguments always go to the stack or the corresponding
6594 // GPRs when within range. For now, we always put the value in both
6595 // locations (or even all three).
6596 if (CFlags.IsVarArg) {
6597 assert(HasParameterArea &&
6598 "Parameter area must exist if we have a varargs call.");
6599 // We could elide this store in the case where the object fits
6600 // entirely in R registers. Maybe later.
6601 SDValue Store =
6602 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6603 MemOpChains.push_back(Store);
6604 if (VR_idx != NumVRs) {
6605 SDValue Load =
6606 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6607 MemOpChains.push_back(Load.getValue(1));
6608 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6609 }
6610 ArgOffset += 16;
6611 for (unsigned i=0; i<16; i+=PtrByteSize) {
6612 if (GPR_idx == NumGPRs)
6613 break;
6614 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6615 DAG.getConstant(i, dl, PtrVT));
6616 SDValue Load =
6617 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6618 MemOpChains.push_back(Load.getValue(1));
6619 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6620 }
6621 break;
6622 }
6623
6624 // Non-varargs Altivec params go into VRs or on the stack.
6625 if (VR_idx != NumVRs) {
6626 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6627 } else {
6628 if (IsFastCall)
6629 ComputePtrOff();
6630
6631 assert(HasParameterArea &&
6632 "Parameter area must exist to pass an argument in memory.");
6633 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6634 true, CFlags.IsTailCall, true, MemOpChains,
6635 TailCallArguments, dl);
6636 if (IsFastCall)
6637 ArgOffset += 16;
6638 }
6639
6640 if (!IsFastCall)
6641 ArgOffset += 16;
6642 break;
6643 }
6644 }
6645
6646 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6647 "mismatch in size of parameter area");
6648 (void)NumBytesActuallyUsed;
6649
6650 if (!MemOpChains.empty())
6651 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6652
6653 // Check if this is an indirect call (MTCTR/BCTRL).
6654 // See prepareDescriptorIndirectCall and buildCallOperands for more
6655 // information about calls through function pointers in the 64-bit SVR4 ABI.
6656 if (CFlags.IsIndirect) {
6657 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6658 // caller in the TOC save area.
6659 if (isTOCSaveRestoreRequired(Subtarget)) {
6660 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6661 // Load r2 into a virtual register and store it to the TOC save area.
6662 setUsesTOCBasePtr(DAG);
6663 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6664 // TOC save area offset.
6665 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6666 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6667 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6668 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6670 DAG.getMachineFunction(), TOCSaveOffset));
6671 }
6672 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6673 // This does not mean the MTCTR instruction must use R12; it's easier
6674 // to model this as an extra parameter, so do that.
6675 if (isELFv2ABI && !CFlags.IsPatchPoint)
6676 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6677 }
6678
6679 // Build a sequence of copy-to-reg nodes chained together with token chain
6680 // and flag operands which copy the outgoing args into the appropriate regs.
6681 SDValue InGlue;
6682 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6683 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6684 RegsToPass[i].second, InGlue);
6685 InGlue = Chain.getValue(1);
6686 }
6687
6688 if (CFlags.IsTailCall && !IsSibCall)
6689 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6690 TailCallArguments);
6691
6692 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6693 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6694}
6695
6696// Returns true when the shadow of a general purpose argument register
6697// in the parameter save area is aligned to at least 'RequiredAlign'.
6698static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6699 assert(RequiredAlign.value() <= 16 &&
6700 "Required alignment greater than stack alignment.");
6701 switch (Reg) {
6702 default:
6703 report_fatal_error("called on invalid register.");
6704 case PPC::R5:
6705 case PPC::R9:
6706 case PPC::X3:
6707 case PPC::X5:
6708 case PPC::X7:
6709 case PPC::X9:
6710 // These registers are 16 byte aligned which is the most strict aligment
6711 // we can support.
6712 return true;
6713 case PPC::R3:
6714 case PPC::R7:
6715 case PPC::X4:
6716 case PPC::X6:
6717 case PPC::X8:
6718 case PPC::X10:
6719 // The shadow of these registers in the PSA is 8 byte aligned.
6720 return RequiredAlign <= 8;
6721 case PPC::R4:
6722 case PPC::R6:
6723 case PPC::R8:
6724 case PPC::R10:
6725 return RequiredAlign <= 4;
6726 }
6727}
6728
6729static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6730 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6731 CCState &S) {
6732 AIXCCState &State = static_cast<AIXCCState &>(S);
6733 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6735 const bool IsPPC64 = Subtarget.isPPC64();
6736 const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6737 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6738
6739 if (ValVT == MVT::f128)
6740 report_fatal_error("f128 is unimplemented on AIX.");
6741
6742 if (ArgFlags.isNest())
6743 report_fatal_error("Nest arguments are unimplemented.");
6744
6745 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6746 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6747 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6748 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6749 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6750 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6751
6752 static const MCPhysReg VR[] = {// Vector registers.
6753 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6754 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6755 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6756
6757 if (ArgFlags.isByVal()) {
6758 if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6759 report_fatal_error("Pass-by-value arguments with alignment greater than "
6760 "register width are not supported.");
6761
6762 const unsigned ByValSize = ArgFlags.getByValSize();
6763
6764 // An empty aggregate parameter takes up no storage and no registers,
6765 // but needs a MemLoc for a stack slot for the formal arguments side.
6766 if (ByValSize == 0) {
6768 State.getStackSize(), RegVT, LocInfo));
6769 return false;
6770 }
6771
6772 const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6773 unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6774 for (const unsigned E = Offset + StackSize; Offset < E;
6775 Offset += PtrAlign.value()) {
6776 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6777 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6778 else {
6781 LocInfo));
6782 break;
6783 }
6784 }
6785 return false;
6786 }
6787
6788 // Arguments always reserve parameter save area.
6789 switch (ValVT.SimpleTy) {
6790 default:
6791 report_fatal_error("Unhandled value type for argument.");
6792 case MVT::i64:
6793 // i64 arguments should have been split to i32 for PPC32.
6794 assert(IsPPC64 && "PPC32 should have split i64 values.");
6795 [[fallthrough]];
6796 case MVT::i1:
6797 case MVT::i32: {
6798 const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6799 // AIX integer arguments are always passed in register width.
6800 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6801 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6803 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6804 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6805 else
6806 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6807
6808 return false;
6809 }
6810 case MVT::f32:
6811 case MVT::f64: {
6812 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6813 const unsigned StoreSize = LocVT.getStoreSize();
6814 // Floats are always 4-byte aligned in the PSA on AIX.
6815 // This includes f64 in 64-bit mode for ABI compatibility.
6816 const unsigned Offset =
6817 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6818 unsigned FReg = State.AllocateReg(FPR);
6819 if (FReg)
6820 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6821
6822 // Reserve and initialize GPRs or initialize the PSA as required.
6823 for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6824 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6825 assert(FReg && "An FPR should be available when a GPR is reserved.");
6826 if (State.isVarArg()) {
6827 // Successfully reserved GPRs are only initialized for vararg calls.
6828 // Custom handling is required for:
6829 // f64 in PPC32 needs to be split into 2 GPRs.
6830 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6831 State.addLoc(
6832 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6833 }
6834 } else {
6835 // If there are insufficient GPRs, the PSA needs to be initialized.
6836 // Initialization occurs even if an FPR was initialized for
6837 // compatibility with the AIX XL compiler. The full memory for the
6838 // argument will be initialized even if a prior word is saved in GPR.
6839 // A custom memLoc is used when the argument also passes in FPR so
6840 // that the callee handling can skip over it easily.
6841 State.addLoc(
6842 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6843 LocInfo)
6844 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6845 break;
6846 }
6847 }
6848
6849 return false;
6850 }
6851 case MVT::v4f32:
6852 case MVT::v4i32:
6853 case MVT::v8i16:
6854 case MVT::v16i8:
6855 case MVT::v2i64:
6856 case MVT::v2f64:
6857 case MVT::v1i128: {
6858 const unsigned VecSize = 16;
6859 const Align VecAlign(VecSize);
6860
6861 if (!State.isVarArg()) {
6862 // If there are vector registers remaining we don't consume any stack
6863 // space.
6864 if (unsigned VReg = State.AllocateReg(VR)) {
6865 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6866 return false;
6867 }
6868 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6869 // might be allocated in the portion of the PSA that is shadowed by the
6870 // GPRs.
6871 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6872 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6873 return false;
6874 }
6875
6876 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6877 ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6878
6879 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6880 // Burn any underaligned registers and their shadowed stack space until
6881 // we reach the required alignment.
6882 while (NextRegIndex != GPRs.size() &&
6883 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6884 // Shadow allocate register and its stack shadow.
6885 unsigned Reg = State.AllocateReg(GPRs);
6886 State.AllocateStack(PtrSize, PtrAlign);
6887 assert(Reg && "Allocating register unexpectedly failed.");
6888 (void)Reg;
6889 NextRegIndex = State.getFirstUnallocated(GPRs);
6890 }
6891
6892 // Vectors that are passed as fixed arguments are handled differently.
6893 // They are passed in VRs if any are available (unlike arguments passed
6894 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6895 // functions)
6896 if (State.isFixed(ValNo)) {
6897 if (unsigned VReg = State.AllocateReg(VR)) {
6898 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6899 // Shadow allocate GPRs and stack space even though we pass in a VR.
6900 for (unsigned I = 0; I != VecSize; I += PtrSize)
6901 State.AllocateReg(GPRs);
6902 State.AllocateStack(VecSize, VecAlign);
6903 return false;
6904 }
6905 // No vector registers remain so pass on the stack.
6906 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6907 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6908 return false;
6909 }
6910
6911 // If all GPRS are consumed then we pass the argument fully on the stack.
6912 if (NextRegIndex == GPRs.size()) {
6913 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6914 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6915 return false;
6916 }
6917
6918 // Corner case for 32-bit codegen. We have 2 registers to pass the first
6919 // half of the argument, and then need to pass the remaining half on the
6920 // stack.
6921 if (GPRs[NextRegIndex] == PPC::R9) {
6922 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6923 State.addLoc(
6924 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6925
6926 const unsigned FirstReg = State.AllocateReg(PPC::R9);
6927 const unsigned SecondReg = State.AllocateReg(PPC::R10);
6928 assert(FirstReg && SecondReg &&
6929 "Allocating R9 or R10 unexpectedly failed.");
6930 State.addLoc(
6931 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
6932 State.addLoc(
6933 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
6934 return false;
6935 }
6936
6937 // We have enough GPRs to fully pass the vector argument, and we have
6938 // already consumed any underaligned registers. Start with the custom
6939 // MemLoc and then the custom RegLocs.
6940 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6941 State.addLoc(
6942 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6943 for (unsigned I = 0; I != VecSize; I += PtrSize) {
6944 const unsigned Reg = State.AllocateReg(GPRs);
6945 assert(Reg && "Failed to allocated register for vararg vector argument");
6946 State.addLoc(
6947 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6948 }
6949 return false;
6950 }
6951 }
6952 return true;
6953}
6954
6955// So far, this function is only used by LowerFormalArguments_AIX()
6957 bool IsPPC64,
6958 bool HasP8Vector,
6959 bool HasVSX) {
6960 assert((IsPPC64 || SVT != MVT::i64) &&
6961 "i64 should have been split for 32-bit codegen.");
6962
6963 switch (SVT) {
6964 default:
6965 report_fatal_error("Unexpected value type for formal argument");
6966 case MVT::i1:
6967 case MVT::i32:
6968 case MVT::i64:
6969 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6970 case MVT::f32:
6971 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6972 case MVT::f64:
6973 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
6974 case MVT::v4f32:
6975 case MVT::v4i32:
6976 case MVT::v8i16:
6977 case MVT::v16i8:
6978 case MVT::v2i64:
6979 case MVT::v2f64:
6980 case MVT::v1i128:
6981 return &PPC::VRRCRegClass;
6982 }
6983}
6984
6986 SelectionDAG &DAG, SDValue ArgValue,
6987 MVT LocVT, const SDLoc &dl) {
6988 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
6989 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
6990
6991 if (Flags.isSExt())
6992 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
6993 DAG.getValueType(ValVT));
6994 else if (Flags.isZExt())
6995 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
6996 DAG.getValueType(ValVT));
6997
6998 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
6999}
7000
7001static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7002 const unsigned LASize = FL->getLinkageSize();
7003
7004 if (PPC::GPRCRegClass.contains(Reg)) {
7005 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7006 "Reg must be a valid argument register!");
7007 return LASize + 4 * (Reg - PPC::R3);
7008 }
7009
7010 if (PPC::G8RCRegClass.contains(Reg)) {
7011 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7012 "Reg must be a valid argument register!");
7013 return LASize + 8 * (Reg - PPC::X3);
7014 }
7015
7016 llvm_unreachable("Only general purpose registers expected.");
7017}
7018
7019// AIX ABI Stack Frame Layout:
7020//
7021// Low Memory +--------------------------------------------+
7022// SP +---> | Back chain | ---+
7023// | +--------------------------------------------+ |
7024// | | Saved Condition Register | |
7025// | +--------------------------------------------+ |
7026// | | Saved Linkage Register | |
7027// | +--------------------------------------------+ | Linkage Area
7028// | | Reserved for compilers | |
7029// | +--------------------------------------------+ |
7030// | | Reserved for binders | |
7031// | +--------------------------------------------+ |
7032// | | Saved TOC pointer | ---+
7033// | +--------------------------------------------+
7034// | | Parameter save area |
7035// | +--------------------------------------------+
7036// | | Alloca space |
7037// | +--------------------------------------------+
7038// | | Local variable space |
7039// | +--------------------------------------------+
7040// | | Float/int conversion temporary |
7041// | +--------------------------------------------+
7042// | | Save area for AltiVec registers |
7043// | +--------------------------------------------+
7044// | | AltiVec alignment padding |
7045// | +--------------------------------------------+
7046// | | Save area for VRSAVE register |
7047// | +--------------------------------------------+
7048// | | Save area for General Purpose registers |
7049// | +--------------------------------------------+
7050// | | Save area for Floating Point registers |
7051// | +--------------------------------------------+
7052// +---- | Back chain |
7053// High Memory +--------------------------------------------+
7054//
7055// Specifications:
7056// AIX 7.2 Assembler Language Reference
7057// Subroutine linkage convention
7058
7059SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7060 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7061 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7062 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7063
7064 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7065 CallConv == CallingConv::Fast) &&
7066 "Unexpected calling convention!");
7067
7068 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7069 report_fatal_error("Tail call support is unimplemented on AIX.");
7070
7071 if (useSoftFloat())
7072 report_fatal_error("Soft float support is unimplemented on AIX.");
7073
7074 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7075
7076 const bool IsPPC64 = Subtarget.isPPC64();
7077 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7078
7079 // Assign locations to all of the incoming arguments.
7082 MachineFrameInfo &MFI = MF.getFrameInfo();
7083 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7084 AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7085
7086 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7087 // Reserve space for the linkage area on the stack.
7088 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7089 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7090 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7091
7093
7094 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7095 CCValAssign &VA = ArgLocs[I++];
7096 MVT LocVT = VA.getLocVT();
7097 MVT ValVT = VA.getValVT();
7098 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7099 // For compatibility with the AIX XL compiler, the float args in the
7100 // parameter save area are initialized even if the argument is available
7101 // in register. The caller is required to initialize both the register
7102 // and memory, however, the callee can choose to expect it in either.
7103 // The memloc is dismissed here because the argument is retrieved from
7104 // the register.
7105 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7106 continue;
7107
7108 auto HandleMemLoc = [&]() {
7109 const unsigned LocSize = LocVT.getStoreSize();
7110 const unsigned ValSize = ValVT.getStoreSize();
7111 assert((ValSize <= LocSize) &&
7112 "Object size is larger than size of MemLoc");
7113 int CurArgOffset = VA.getLocMemOffset();
7114 // Objects are right-justified because AIX is big-endian.
7115 if (LocSize > ValSize)
7116 CurArgOffset += LocSize - ValSize;
7117 // Potential tail calls could cause overwriting of argument stack slots.
7118 const bool IsImmutable =
7120 (CallConv == CallingConv::Fast));
7121 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7122 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7123 SDValue ArgValue =
7124 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7125 InVals.push_back(ArgValue);
7126 };
7127
7128 // Vector arguments to VaArg functions are passed both on the stack, and
7129 // in any available GPRs. Load the value from the stack and add the GPRs
7130 // as live ins.
7131 if (VA.isMemLoc() && VA.needsCustom()) {
7132 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7133 assert(isVarArg && "Only use custom memloc for vararg.");
7134 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7135 // matching custom RegLocs.
7136 const unsigned OriginalValNo = VA.getValNo();
7137 (void)OriginalValNo;
7138
7139 auto HandleCustomVecRegLoc = [&]() {
7140 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7141 "Missing custom RegLoc.");
7142 VA = ArgLocs[I++];
7143 assert(VA.getValVT().isVector() &&
7144 "Unexpected Val type for custom RegLoc.");
7145 assert(VA.getValNo() == OriginalValNo &&
7146 "ValNo mismatch between custom MemLoc and RegLoc.");
7148 MF.addLiveIn(VA.getLocReg(),
7149 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7150 Subtarget.hasVSX()));
7151 };
7152
7153 HandleMemLoc();
7154 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7155 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7156 // R10.
7157 HandleCustomVecRegLoc();
7158 HandleCustomVecRegLoc();
7159
7160 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7161 // we passed the vector in R5, R6, R7 and R8.
7162 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7163 assert(!IsPPC64 &&
7164 "Only 2 custom RegLocs expected for 64-bit codegen.");
7165 HandleCustomVecRegLoc();
7166 HandleCustomVecRegLoc();
7167 }
7168
7169 continue;
7170 }
7171
7172 if (VA.isRegLoc()) {
7173 if (VA.getValVT().isScalarInteger())
7175 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7176 switch (VA.getValVT().SimpleTy) {
7177 default:
7178 report_fatal_error("Unhandled value type for argument.");
7179 case MVT::f32:
7181 break;
7182 case MVT::f64:
7184 break;
7185 }
7186 } else if (VA.getValVT().isVector()) {
7187 switch (VA.getValVT().SimpleTy) {
7188 default:
7189 report_fatal_error("Unhandled value type for argument.");
7190 case MVT::v16i8:
7192 break;
7193 case MVT::v8i16:
7195 break;
7196 case MVT::v4i32:
7197 case MVT::v2i64:
7198 case MVT::v1i128:
7200 break;
7201 case MVT::v4f32:
7202 case MVT::v2f64:
7204 break;
7205 }
7206 }
7207 }
7208
7209 if (Flags.isByVal() && VA.isMemLoc()) {
7210 const unsigned Size =
7211 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7212 PtrByteSize);
7213 const int FI = MF.getFrameInfo().CreateFixedObject(
7214 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7215 /* IsAliased */ true);
7216 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7217 InVals.push_back(FIN);
7218
7219 continue;
7220 }
7221
7222 if (Flags.isByVal()) {
7223 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7224
7225 const MCPhysReg ArgReg = VA.getLocReg();
7226 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7227
7228 if (Flags.getNonZeroByValAlign() > PtrByteSize)
7229 report_fatal_error("Over aligned byvals not supported yet.");
7230
7231 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7232 const int FI = MF.getFrameInfo().CreateFixedObject(
7233 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7234 /* IsAliased */ true);
7235 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7236 InVals.push_back(FIN);
7237
7238 // Add live ins for all the RegLocs for the same ByVal.
7239 const TargetRegisterClass *RegClass =
7240 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7241
7242 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7243 unsigned Offset) {
7244 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7245 // Since the callers side has left justified the aggregate in the
7246 // register, we can simply store the entire register into the stack
7247 // slot.
7248 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7249 // The store to the fixedstack object is needed becuase accessing a
7250 // field of the ByVal will use a gep and load. Ideally we will optimize
7251 // to extracting the value from the register directly, and elide the
7252 // stores when the arguments address is not taken, but that will need to
7253 // be future work.
7254 SDValue Store = DAG.getStore(
7255 CopyFrom.getValue(1), dl, CopyFrom,
7258
7259 MemOps.push_back(Store);
7260 };
7261
7262 unsigned Offset = 0;
7263 HandleRegLoc(VA.getLocReg(), Offset);
7264 Offset += PtrByteSize;
7265 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7266 Offset += PtrByteSize) {
7267 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7268 "RegLocs should be for ByVal argument.");
7269
7270 const CCValAssign RL = ArgLocs[I++];
7271 HandleRegLoc(RL.getLocReg(), Offset);
7273 }
7274
7275 if (Offset != StackSize) {
7276 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7277 "Expected MemLoc for remaining bytes.");
7278 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7279 // Consume the MemLoc.The InVal has already been emitted, so nothing
7280 // more needs to be done.
7281 ++I;
7282 }
7283
7284 continue;
7285 }
7286
7287 if (VA.isRegLoc() && !VA.needsCustom()) {
7288 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7289 Register VReg =
7290 MF.addLiveIn(VA.getLocReg(),
7291 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7292 Subtarget.hasVSX()));
7293 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7294 if (ValVT.isScalarInteger() &&
7295 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7296 ArgValue =
7297 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7298 }
7299 InVals.push_back(ArgValue);
7300 continue;
7301 }
7302 if (VA.isMemLoc()) {
7303 HandleMemLoc();
7304 continue;
7305 }
7306 }
7307
7308 // On AIX a minimum of 8 words is saved to the parameter save area.
7309 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7310 // Area that is at least reserved in the caller of this function.
7311 unsigned CallerReservedArea = std::max<unsigned>(
7312 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7313
7314 // Set the size that is at least reserved in caller of this function. Tail
7315 // call optimized function's reserved stack space needs to be aligned so
7316 // that taking the difference between two stack areas will result in an
7317 // aligned stack.
7318 CallerReservedArea =
7319 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7320 FuncInfo->setMinReservedArea(CallerReservedArea);
7321
7322 if (isVarArg) {
7323 FuncInfo->setVarArgsFrameIndex(
7324 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7325 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7326
7327 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7328 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7329
7330 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7331 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7332 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7333
7334 // The fixed integer arguments of a variadic function are stored to the
7335 // VarArgsFrameIndex on the stack so that they may be loaded by
7336 // dereferencing the result of va_next.
7337 for (unsigned GPRIndex =
7338 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7339 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7340
7341 const Register VReg =
7342 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7343 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7344
7345 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7346 SDValue Store =
7347 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7348 MemOps.push_back(Store);
7349 // Increment the address for the next argument to store.
7350 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7351 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7352 }
7353 }
7354
7355 if (!MemOps.empty())
7356 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7357
7358 return Chain;
7359}
7360
7361SDValue PPCTargetLowering::LowerCall_AIX(
7362 SDValue Chain, SDValue Callee, CallFlags CFlags,
7364 const SmallVectorImpl<SDValue> &OutVals,
7365 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7367 const CallBase *CB) const {
7368 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7369 // AIX ABI stack frame layout.
7370
7371 assert((CFlags.CallConv == CallingConv::C ||
7372 CFlags.CallConv == CallingConv::Cold ||
7373 CFlags.CallConv == CallingConv::Fast) &&
7374 "Unexpected calling convention!");
7375
7376 if (CFlags.IsPatchPoint)
7377 report_fatal_error("This call type is unimplemented on AIX.");
7378
7379 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7380
7383 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7384 *DAG.getContext());
7385
7386 // Reserve space for the linkage save area (LSA) on the stack.
7387 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7388 // [SP][CR][LR][2 x reserved][TOC].
7389 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7390 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7391 const bool IsPPC64 = Subtarget.isPPC64();
7392 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7393 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7394 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7395 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7396
7397 // The prolog code of the callee may store up to 8 GPR argument registers to
7398 // the stack, allowing va_start to index over them in memory if the callee
7399 // is variadic.
7400 // Because we cannot tell if this is needed on the caller side, we have to
7401 // conservatively assume that it is needed. As such, make sure we have at
7402 // least enough stack space for the caller to store the 8 GPRs.
7403 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7404 const unsigned NumBytes = std::max<unsigned>(
7405 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7406
7407 // Adjust the stack pointer for the new arguments...
7408 // These operations are automatically eliminated by the prolog/epilog pass.
7409 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7410 SDValue CallSeqStart = Chain;
7411
7413 SmallVector<SDValue, 8> MemOpChains;
7414
7415 // Set up a copy of the stack pointer for loading and storing any
7416 // arguments that may not fit in the registers available for argument
7417 // passing.
7418 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7419 : DAG.getRegister(PPC::R1, MVT::i32);
7420
7421 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7422 const unsigned ValNo = ArgLocs[I].getValNo();
7423 SDValue Arg = OutVals[ValNo];
7424 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7425
7426 if (Flags.isByVal()) {
7427 const unsigned ByValSize = Flags.getByValSize();
7428
7429 // Nothing to do for zero-sized ByVals on the caller side.
7430 if (!ByValSize) {
7431 ++I;
7432 continue;
7433 }
7434
7435 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7436 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7437 (LoadOffset != 0)
7438 ? DAG.getObjectPtrOffset(
7439 dl, Arg, TypeSize::getFixed(LoadOffset))
7440 : Arg,
7441 MachinePointerInfo(), VT);
7442 };
7443
7444 unsigned LoadOffset = 0;
7445
7446 // Initialize registers, which are fully occupied by the by-val argument.
7447 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7448 SDValue Load = GetLoad(PtrVT, LoadOffset);
7449 MemOpChains.push_back(Load.getValue(1));
7450 LoadOffset += PtrByteSize;
7451 const CCValAssign &ByValVA = ArgLocs[I++];
7452 assert(ByValVA.getValNo() == ValNo &&
7453 "Unexpected location for pass-by-value argument.");
7454 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7455 }
7456
7457 if (LoadOffset == ByValSize)
7458 continue;
7459
7460 // There must be one more loc to handle the remainder.
7461 assert(ArgLocs[I].getValNo() == ValNo &&
7462 "Expected additional location for by-value argument.");
7463
7464 if (ArgLocs[I].isMemLoc()) {
7465 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7466 const CCValAssign &ByValVA = ArgLocs[I++];
7467 ISD::ArgFlagsTy MemcpyFlags = Flags;
7468 // Only memcpy the bytes that don't pass in register.
7469 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7470 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7471 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7472 dl, Arg, TypeSize::getFixed(LoadOffset))
7473 : Arg,
7475 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7476 CallSeqStart, MemcpyFlags, DAG, dl);
7477 continue;
7478 }
7479
7480 // Initialize the final register residue.
7481 // Any residue that occupies the final by-val arg register must be
7482 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7483 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7484 // 2 and 1 byte loads.
7485 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7486 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7487 "Unexpected register residue for by-value argument.");
7488 SDValue ResidueVal;
7489 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7490 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7491 const MVT VT =
7492 N == 1 ? MVT::i8
7493 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7494 SDValue Load = GetLoad(VT, LoadOffset);
7495 MemOpChains.push_back(Load.getValue(1));
7496 LoadOffset += N;
7497 Bytes += N;
7498
7499 // By-val arguments are passed left-justfied in register.
7500 // Every load here needs to be shifted, otherwise a full register load
7501 // should have been used.
7502 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7503 "Unexpected load emitted during handling of pass-by-value "
7504 "argument.");
7505 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7506 EVT ShiftAmountTy =
7507 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7508 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7509 SDValue ShiftedLoad =
7510 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7511 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7512 ShiftedLoad)
7513 : ShiftedLoad;
7514 }
7515
7516 const CCValAssign &ByValVA = ArgLocs[I++];
7517 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7518 continue;
7519 }
7520
7521 CCValAssign &VA = ArgLocs[I++];
7522 const MVT LocVT = VA.getLocVT();
7523 const MVT ValVT = VA.getValVT();
7524
7525 switch (VA.getLocInfo()) {
7526 default:
7527 report_fatal_error("Unexpected argument extension type.");
7528 case CCValAssign::Full:
7529 break;
7530 case CCValAssign::ZExt:
7531 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7532 break;
7533 case CCValAssign::SExt:
7534 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7535 break;
7536 }
7537
7538 if (VA.isRegLoc() && !VA.needsCustom()) {
7539 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7540 continue;
7541 }
7542
7543 // Vector arguments passed to VarArg functions need custom handling when
7544 // they are passed (at least partially) in GPRs.
7545 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7546 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7547 // Store value to its stack slot.
7548 SDValue PtrOff =
7549 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7550 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7551 SDValue Store =
7552 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7553 MemOpChains.push_back(Store);
7554 const unsigned OriginalValNo = VA.getValNo();
7555 // Then load the GPRs from the stack
7556 unsigned LoadOffset = 0;
7557 auto HandleCustomVecRegLoc = [&]() {
7558 assert(I != E && "Unexpected end of CCvalAssigns.");
7559 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7560 "Expected custom RegLoc.");
7561 CCValAssign RegVA = ArgLocs[I++];
7562 assert(RegVA.getValNo() == OriginalValNo &&
7563 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7564 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7565 DAG.getConstant(LoadOffset, dl, PtrVT));
7566 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7567 MemOpChains.push_back(Load.getValue(1));
7568 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7569 LoadOffset += PtrByteSize;
7570 };
7571
7572 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7573 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7574 // R10.
7575 HandleCustomVecRegLoc();
7576 HandleCustomVecRegLoc();
7577
7578 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7579 ArgLocs[I].getValNo() == OriginalValNo) {
7580 assert(!IsPPC64 &&
7581 "Only 2 custom RegLocs expected for 64-bit codegen.");
7582 HandleCustomVecRegLoc();
7583 HandleCustomVecRegLoc();
7584 }
7585
7586 continue;
7587 }
7588
7589 if (VA.isMemLoc()) {
7590 SDValue PtrOff =
7591 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7592 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7593 MemOpChains.push_back(
7594 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7595
7596 continue;
7597 }
7598
7599 if (!ValVT.isFloatingPoint())
7601 "Unexpected register handling for calling convention.");
7602
7603 // Custom handling is used for GPR initializations for vararg float
7604 // arguments.
7605 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7606 LocVT.isInteger() &&
7607 "Custom register handling only expected for VarArg.");
7608
7609 SDValue ArgAsInt =
7610 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7611
7612 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7613 // f32 in 32-bit GPR
7614 // f64 in 64-bit GPR
7615 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7616 else if (Arg.getValueType().getFixedSizeInBits() <
7617 LocVT.getFixedSizeInBits())
7618 // f32 in 64-bit GPR.
7619 RegsToPass.push_back(std::make_pair(
7620 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7621 else {
7622 // f64 in two 32-bit GPRs
7623 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7624 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7625 "Unexpected custom register for argument!");
7626 CCValAssign &GPR1 = VA;
7627 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7628 DAG.getConstant(32, dl, MVT::i8));
7629 RegsToPass.push_back(std::make_pair(
7630 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7631
7632 if (I != E) {
7633 // If only 1 GPR was available, there will only be one custom GPR and
7634 // the argument will also pass in memory.
7635 CCValAssign &PeekArg = ArgLocs[I];
7636 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7637 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7638 CCValAssign &GPR2 = ArgLocs[I++];
7639 RegsToPass.push_back(std::make_pair(
7640 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7641 }
7642 }
7643 }
7644 }
7645
7646 if (!MemOpChains.empty())
7647 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7648
7649 // For indirect calls, we need to save the TOC base to the stack for
7650 // restoration after the call.
7651 if (CFlags.IsIndirect) {
7652 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7653 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7654 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7655 const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7656 const unsigned TOCSaveOffset =
7657 Subtarget.getFrameLowering()->getTOCSaveOffset();
7658
7659 setUsesTOCBasePtr(DAG);
7660 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7661 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7662 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7663 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7664 Chain = DAG.getStore(
7665 Val.getValue(1), dl, Val, AddPtr,
7666 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7667 }
7668
7669 // Build a sequence of copy-to-reg nodes chained together with token chain
7670 // and flag operands which copy the outgoing args into the appropriate regs.
7671 SDValue InGlue;
7672 for (auto Reg : RegsToPass) {
7673 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7674 InGlue = Chain.getValue(1);
7675 }
7676
7677 const int SPDiff = 0;
7678 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7679 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7680}
7681
7682bool
7683PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7684 MachineFunction &MF, bool isVarArg,
7686 LLVMContext &Context) const {
7688 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7689 return CCInfo.CheckReturn(
7690 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7692 : RetCC_PPC);
7693}
7694
7695SDValue
7696PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7697 bool isVarArg,
7699 const SmallVectorImpl<SDValue> &OutVals,
7700 const SDLoc &dl, SelectionDAG &DAG) const {
7702 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7703 *DAG.getContext());
7704 CCInfo.AnalyzeReturn(Outs,
7705 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7707 : RetCC_PPC);
7708
7709 SDValue Glue;
7710 SmallVector<SDValue, 4> RetOps(1, Chain);
7711
7712 // Copy the result values into the output registers.
7713 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7714 CCValAssign &VA = RVLocs[i];
7715 assert(VA.isRegLoc() && "Can only return in registers!");
7716
7717 SDValue Arg = OutVals[RealResIdx];
7718
7719 switch (VA.getLocInfo()) {
7720 default: llvm_unreachable("Unknown loc info!");
7721 case CCValAssign::Full: break;
7722 case CCValAssign::AExt:
7723 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7724 break;
7725 case CCValAssign::ZExt:
7726 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7727 break;
7728 case CCValAssign::SExt:
7729 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7730 break;
7731 }
7732 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7733 bool isLittleEndian = Subtarget.isLittleEndian();
7734 // Legalize ret f64 -> ret 2 x i32.
7735 SDValue SVal =
7736 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7737 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7738 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7739 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7740 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7741 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7742 Glue = Chain.getValue(1);
7743 VA = RVLocs[++i]; // skip ahead to next loc
7744 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7745 } else
7746 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7747 Glue = Chain.getValue(1);
7748 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7749 }
7750
7751 RetOps[0] = Chain; // Update chain.
7752
7753 // Add the glue if we have it.
7754 if (Glue.getNode())
7755 RetOps.push_back(Glue);
7756
7757 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7758}
7759
7760SDValue
7761PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7762 SelectionDAG &DAG) const {
7763 SDLoc dl(Op);
7764
7765 // Get the correct type for integers.
7766 EVT IntVT = Op.getValueType();
7767
7768 // Get the inputs.
7769 SDValue Chain = Op.getOperand(0);
7770 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7771 // Build a DYNAREAOFFSET node.
7772 SDValue Ops[2] = {Chain, FPSIdx};
7773 SDVTList VTs = DAG.getVTList(IntVT);
7774 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7775}
7776
7777SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7778 SelectionDAG &DAG) const {
7779 // When we pop the dynamic allocation we need to restore the SP link.
7780 SDLoc dl(Op);
7781
7782 // Get the correct type for pointers.
7783 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7784
7785 // Construct the stack pointer operand.
7786 bool isPPC64 = Subtarget.isPPC64();
7787 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7788 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7789
7790 // Get the operands for the STACKRESTORE.
7791 SDValue Chain = Op.getOperand(0);
7792 SDValue SaveSP = Op.getOperand(1);
7793
7794 // Load the old link SP.
7795 SDValue LoadLinkSP =
7796 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7797
7798 // Restore the stack pointer.
7799 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7800
7801 // Store the old link SP.
7802 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7803}
7804
7805SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7807 bool isPPC64 = Subtarget.isPPC64();
7808 EVT PtrVT = getPointerTy(MF.getDataLayout());
7809
7810 // Get current frame pointer save index. The users of this index will be
7811 // primarily DYNALLOC instructions.
7813 int RASI = FI->getReturnAddrSaveIndex();
7814
7815 // If the frame pointer save index hasn't been defined yet.
7816 if (!RASI) {
7817 // Find out what the fix offset of the frame pointer save area.
7818 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7819 // Allocate the frame index for frame pointer save area.
7820 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7821 // Save the result.
7822 FI->setReturnAddrSaveIndex(RASI);
7823 }
7824 return DAG.getFrameIndex(RASI, PtrVT);
7825}
7826
7827SDValue
7828PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7830 bool isPPC64 = Subtarget.isPPC64();
7831 EVT PtrVT = getPointerTy(MF.getDataLayout());
7832
7833 // Get current frame pointer save index. The users of this index will be
7834 // primarily DYNALLOC instructions.
7836 int FPSI = FI->getFramePointerSaveIndex();
7837
7838 // If the frame pointer save index hasn't been defined yet.
7839 if (!FPSI) {
7840 // Find out what the fix offset of the frame pointer save area.
7841 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7842 // Allocate the frame index for frame pointer save area.
7843 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7844 // Save the result.
7845 FI->setFramePointerSaveIndex(FPSI);
7846 }
7847 return DAG.getFrameIndex(FPSI, PtrVT);
7848}
7849
7850SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7851 SelectionDAG &DAG) const {
7853 // Get the inputs.
7854 SDValue Chain = Op.getOperand(0);
7855 SDValue Size = Op.getOperand(1);
7856 SDLoc dl(Op);
7857
7858 // Get the correct type for pointers.
7859 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7860 // Negate the size.
7861 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7862 DAG.getConstant(0, dl, PtrVT), Size);
7863 // Construct a node for the frame pointer save index.
7864 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7865 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7866 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7867 if (hasInlineStackProbe(MF))
7868 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7869 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7870}
7871
7872SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7873 SelectionDAG &DAG) const {
7875
7876 bool isPPC64 = Subtarget.isPPC64();
7877 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7878
7879 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7880 return DAG.getFrameIndex(FI, PtrVT);
7881}
7882
7883SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7884 SelectionDAG &DAG) const {
7885 SDLoc DL(Op);
7886 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7887 DAG.getVTList(MVT::i32, MVT::Other),
7888 Op.getOperand(0), Op.getOperand(1));
7889}
7890
7891SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7892 SelectionDAG &DAG) const {
7893 SDLoc DL(Op);
7894 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7895 Op.getOperand(0), Op.getOperand(1));
7896}
7897
7898SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7899 if (Op.getValueType().isVector())
7900 return LowerVectorLoad(Op, DAG);
7901
7902 assert(Op.getValueType() == MVT::i1 &&
7903 "Custom lowering only for i1 loads");
7904
7905 // First, load 8 bits into 32 bits, then truncate to 1 bit.
7906
7907 SDLoc dl(Op);
7908 LoadSDNode *LD = cast<LoadSDNode>(Op);
7909
7910 SDValue Chain = LD->getChain();
7911 SDValue BasePtr = LD->getBasePtr();
7912 MachineMemOperand *MMO = LD->getMemOperand();
7913
7914 SDValue NewLD =
7915 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7916 BasePtr, MVT::i8, MMO);
7917 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7918
7919 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7920 return DAG.getMergeValues(Ops, dl);
7921}
7922
7923SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7924 if (Op.getOperand(1).getValueType().isVector())
7925 return LowerVectorStore(Op, DAG);
7926
7927 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7928 "Custom lowering only for i1 stores");
7929
7930 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7931
7932 SDLoc dl(Op);
7933 StoreSDNode *ST = cast<StoreSDNode>(Op);
7934
7935 SDValue Chain = ST->getChain();
7936 SDValue BasePtr = ST->getBasePtr();
7937 SDValue Value = ST->getValue();
7938 MachineMemOperand *MMO = ST->getMemOperand();
7939
7941 Value);
7942 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7943}
7944
7945// FIXME: Remove this once the ANDI glue bug is fixed:
7946SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7947 assert(Op.getValueType() == MVT::i1 &&
7948 "Custom lowering only for i1 results");
7949
7950 SDLoc DL(Op);
7951 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7952}
7953
7954SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7955 SelectionDAG &DAG) const {
7956
7957 // Implements a vector truncate that fits in a vector register as a shuffle.
7958 // We want to legalize vector truncates down to where the source fits in
7959 // a vector register (and target is therefore smaller than vector register
7960 // size). At that point legalization will try to custom lower the sub-legal
7961 // result and get here - where we can contain the truncate as a single target
7962 // operation.
7963
7964 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7965 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7966 //
7967 // We will implement it for big-endian ordering as this (where x denotes
7968 // undefined):
7969 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7970 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7971 //
7972 // The same operation in little-endian ordering will be:
7973 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7974 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7975
7976 EVT TrgVT = Op.getValueType();
7977 assert(TrgVT.isVector() && "Vector type expected.");
7978 unsigned TrgNumElts = TrgVT.getVectorNumElements();
7979 EVT EltVT = TrgVT.getVectorElementType();
7980 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
7981 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
7982 !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
7983 return SDValue();
7984
7985 SDValue N1 = Op.getOperand(0);
7986 EVT SrcVT = N1.getValueType();
7987 unsigned SrcSize = SrcVT.getSizeInBits();
7988 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
7989 !llvm::has_single_bit<uint32_t>(
7991 return SDValue();
7992 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
7993 return SDValue();
7994
7995 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7996 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7997
7998 SDLoc DL(Op);
7999 SDValue Op1, Op2;
8000 if (SrcSize == 256) {
8001 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8002 EVT SplitVT =
8004 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8005 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8006 DAG.getConstant(0, DL, VecIdxTy));
8007 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8008 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8009 }
8010 else {
8011 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8012 Op2 = DAG.getUNDEF(WideVT);
8013 }
8014
8015 // First list the elements we want to keep.
8016 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8017 SmallVector<int, 16> ShuffV;
8018 if (Subtarget.isLittleEndian())
8019 for (unsigned i = 0; i < TrgNumElts; ++i)
8020 ShuffV.push_back(i * SizeMult);
8021 else
8022 for (unsigned i = 1; i <= TrgNumElts; ++i)
8023 ShuffV.push_back(i * SizeMult - 1);
8024
8025 // Populate the remaining elements with undefs.
8026 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8027 // ShuffV.push_back(i + WideNumElts);
8028 ShuffV.push_back(WideNumElts + 1);
8029
8030 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8031 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8032 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8033}
8034
8035/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8036/// possible.
8037SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8038 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8039 EVT ResVT = Op.getValueType();
8040 EVT CmpVT = Op.getOperand(0).getValueType();
8041 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8042 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8043 SDLoc dl(Op);
8044
8045 // Without power9-vector, we don't have native instruction for f128 comparison.
8046 // Following transformation to libcall is needed for setcc:
8047 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8048 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8049 SDValue Z = DAG.getSetCC(
8050 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8051 LHS, RHS, CC);
8052 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8053 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8054 }
8055
8056 // Not FP, or using SPE? Not a fsel.
8057 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8058 Subtarget.hasSPE())
8059 return Op;
8060
8061 SDNodeFlags Flags = Op.getNode()->getFlags();
8062
8063 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8064 // presence of infinities.
8065 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8066 switch (CC) {
8067 default:
8068 break;
8069 case ISD::SETOGT:
8070 case ISD::SETGT:
8071 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8072 case ISD::SETOLT:
8073 case ISD::SETLT:
8074 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8075 }
8076 }
8077
8078 // We might be able to do better than this under some circumstances, but in
8079 // general, fsel-based lowering of select is a finite-math-only optimization.
8080 // For more information, see section F.3 of the 2.06 ISA specification.
8081 // With ISA 3.0
8082 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8083 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8084 ResVT == MVT::f128)
8085 return Op;
8086
8087 // If the RHS of the comparison is a 0.0, we don't need to do the
8088 // subtraction at all.
8089 SDValue Sel1;
8090 if (isFloatingPointZero(RHS))
8091 switch (CC) {
8092 default: break; // SETUO etc aren't handled by fsel.
8093 case ISD::SETNE:
8094 std::swap(TV, FV);
8095 [[fallthrough]];
8096 case ISD::SETEQ:
8097 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8098 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8099 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8100 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8101 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8102 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8103 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8104 case ISD::SETULT:
8105 case ISD::SETLT:
8106 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8107 [[fallthrough]];
8108 case ISD::SETOGE:
8109 case ISD::SETGE:
8110 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8111 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8112 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8113 case ISD::SETUGT:
8114 case ISD::SETGT:
8115 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8116 [[fallthrough]];
8117 case ISD::SETOLE:
8118 case ISD::SETLE:
8119 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8120 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8121 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8122 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8123 }
8124
8125 SDValue Cmp;
8126 switch (CC) {
8127 default: break; // SETUO etc aren't handled by fsel.
8128 case ISD::SETNE:
8129 std::swap(TV, FV);
8130 [[fallthrough]];
8131 case ISD::SETEQ:
8132 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8133 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8134 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8135 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8136 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8137 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8138 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8139 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8140 case ISD::SETULT:
8141 case ISD::SETLT:
8142 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8143 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8144 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8145 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8146 case ISD::SETOGE:
8147 case ISD::SETGE:
8148 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8149 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8150 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8151 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8152 case ISD::SETUGT:
8153 case ISD::SETGT:
8154 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8155 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8156 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8157 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8158 case ISD::SETOLE:
8159 case ISD::SETLE:
8160 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8161 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8162 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8163 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8164 }
8165 return Op;
8166}
8167
8168static unsigned getPPCStrictOpcode(unsigned Opc) {
8169 switch (Opc) {
8170 default:
8171 llvm_unreachable("No strict version of this opcode!");
8172 case PPCISD::FCTIDZ:
8173 return PPCISD::STRICT_FCTIDZ;
8174 case PPCISD::FCTIWZ:
8175 return PPCISD::STRICT_FCTIWZ;
8176 case PPCISD::FCTIDUZ:
8178 case PPCISD::FCTIWUZ:
8180 case PPCISD::FCFID:
8181 return PPCISD::STRICT_FCFID;
8182 case PPCISD::FCFIDU:
8183 return PPCISD::STRICT_FCFIDU;
8184 case PPCISD::FCFIDS:
8185 return PPCISD::STRICT_FCFIDS;
8186 case PPCISD::FCFIDUS:
8188 }
8189}
8190
8192 const PPCSubtarget &Subtarget) {
8193 SDLoc dl(Op);
8194 bool IsStrict = Op->isStrictFPOpcode();
8195 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8196 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8197
8198 // TODO: Any other flags to propagate?
8199 SDNodeFlags Flags;
8200 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8201
8202 // For strict nodes, source is the second operand.
8203 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8204 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8205 MVT DestTy = Op.getSimpleValueType();
8206 assert(Src.getValueType().isFloatingPoint() &&
8207 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8208 DestTy == MVT::i64) &&
8209 "Invalid FP_TO_INT types");
8210 if (Src.getValueType() == MVT::f32) {
8211 if (IsStrict) {
8212 Src =
8214 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8215 Chain = Src.getValue(1);
8216 } else
8217 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8218 }
8219 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8220 DestTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
8221 unsigned Opc = ISD::DELETED_NODE;
8222 switch (DestTy.SimpleTy) {
8223 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8224 case MVT::i32:
8225 Opc = IsSigned ? PPCISD::FCTIWZ
8226 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8227 break;
8228 case MVT::i64:
8229 assert((IsSigned || Subtarget.hasFPCVT()) &&
8230 "i64 FP_TO_UINT is supported only with FPCVT");
8231 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8232 }
8233 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8234 SDValue Conv;
8235 if (IsStrict) {
8236 Opc = getPPCStrictOpcode(Opc);
8237 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8238 Flags);
8239 } else {
8240 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8241 }
8242 return Conv;
8243}
8244
8245void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8246 SelectionDAG &DAG,
8247 const SDLoc &dl) const {
8248 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8249 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8250 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8251 bool IsStrict = Op->isStrictFPOpcode();
8252
8253 // Convert the FP value to an int value through memory.
8254 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8255 (IsSigned || Subtarget.hasFPCVT());
8256 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8257 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8258 MachinePointerInfo MPI =
8260
8261 // Emit a store to the stack slot.
8262 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8263 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8264 if (i32Stack) {
8266 Alignment = Align(4);
8267 MachineMemOperand *MMO =
8268 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8269 SDValue Ops[] = { Chain, Tmp, FIPtr };
8270 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8271 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8272 } else
8273 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8274
8275 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8276 // add in a bias on big endian.
8277 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8278 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8279 DAG.getConstant(4, dl, FIPtr.getValueType()));
8280 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8281 }
8282
8283 RLI.Chain = Chain;
8284 RLI.Ptr = FIPtr;
8285 RLI.MPI = MPI;
8286 RLI.Alignment = Alignment;
8287}
8288
8289/// Custom lowers floating point to integer conversions to use
8290/// the direct move instructions available in ISA 2.07 to avoid the
8291/// need for load/store combinations.
8292SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8293 SelectionDAG &DAG,
8294 const SDLoc &dl) const {
8295 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8296 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8297 if (Op->isStrictFPOpcode())
8298 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8299 else
8300 return Mov;
8301}
8302
8303SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8304 const SDLoc &dl) const {
8305 bool IsStrict = Op->isStrictFPOpcode();
8306 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8307 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8308 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8309 EVT SrcVT = Src.getValueType();
8310 EVT DstVT = Op.getValueType();
8311
8312 // FP to INT conversions are legal for f128.
8313 if (SrcVT == MVT::f128)
8314 return Subtarget.hasP9Vector() ? Op : SDValue();
8315
8316 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8317 // PPC (the libcall is not available).
8318 if (SrcVT == MVT::ppcf128) {
8319 if (DstVT == MVT::i32) {
8320 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8321 // set other fast-math flags to FP operations in both strict and
8322 // non-strict cases. (FP_TO_SINT, FSUB)
8324 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8325
8326 if (IsSigned) {
8327 SDValue Lo, Hi;
8328 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8329
8330 // Add the two halves of the long double in round-to-zero mode, and use
8331 // a smaller FP_TO_SINT.
8332 if (IsStrict) {
8334 DAG.getVTList(MVT::f64, MVT::Other),
8335 {Op.getOperand(0), Lo, Hi}, Flags);
8336 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8337 DAG.getVTList(MVT::i32, MVT::Other),
8338 {Res.getValue(1), Res}, Flags);
8339 } else {
8340 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8341 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8342 }
8343 } else {
8344 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8345 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8346 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8347 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8348 if (IsStrict) {
8349 // Sel = Src < 0x80000000
8350 // FltOfs = select Sel, 0.0, 0x80000000
8351 // IntOfs = select Sel, 0, 0x80000000
8352 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8353 SDValue Chain = Op.getOperand(0);
8354 EVT SetCCVT =
8355 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8356 EVT DstSetCCVT =
8357 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8358 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8359 Chain, true);
8360 Chain = Sel.getValue(1);
8361
8362 SDValue FltOfs = DAG.getSelect(
8363 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8364 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8365
8366 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8367 DAG.getVTList(SrcVT, MVT::Other),
8368 {Chain, Src, FltOfs}, Flags);
8369 Chain = Val.getValue(1);
8370 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8371 DAG.getVTList(DstVT, MVT::Other),
8372 {Chain, Val}, Flags);
8373 Chain = SInt.getValue(1);
8374 SDValue IntOfs = DAG.getSelect(
8375 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8376 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8377 return DAG.getMergeValues({Result, Chain}, dl);
8378 } else {
8379 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8380 // FIXME: generated code sucks.
8381 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8382 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8383 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8384 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8385 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8386 }
8387 }
8388 }
8389
8390 return SDValue();
8391 }
8392
8393 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8394 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8395
8396 ReuseLoadInfo RLI;
8397 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8398
8399 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8400 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8401}
8402
8403// We're trying to insert a regular store, S, and then a load, L. If the
8404// incoming value, O, is a load, we might just be able to have our load use the
8405// address used by O. However, we don't know if anything else will store to
8406// that address before we can load from it. To prevent this situation, we need
8407// to insert our load, L, into the chain as a peer of O. To do this, we give L
8408// the same chain operand as O, we create a token factor from the chain results
8409// of O and L, and we replace all uses of O's chain result with that token
8410// factor (see spliceIntoChain below for this last part).
8411bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8412 ReuseLoadInfo &RLI,
8413 SelectionDAG &DAG,
8414 ISD::LoadExtType ET) const {
8415 // Conservatively skip reusing for constrained FP nodes.
8416 if (Op->isStrictFPOpcode())
8417 return false;
8418
8419 SDLoc dl(Op);
8420 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8421 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8422 if (ET == ISD::NON_EXTLOAD &&
8423 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8424 isOperationLegalOrCustom(Op.getOpcode(),
8425 Op.getOperand(0).getValueType())) {
8426
8427 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8428 return true;
8429 }
8430
8431 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8432 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8433 LD->isNonTemporal())
8434 return false;
8435 if (LD->getMemoryVT() != MemVT)
8436 return false;
8437
8438 // If the result of the load is an illegal type, then we can't build a
8439 // valid chain for reuse since the legalised loads and token factor node that
8440 // ties the legalised loads together uses a different output chain then the
8441 // illegal load.
8442 if (!isTypeLegal(LD->getValueType(0)))
8443 return false;
8444
8445 RLI.Ptr = LD->getBasePtr();
8446 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8447 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8448 "Non-pre-inc AM on PPC?");
8449 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8450 LD->getOffset());
8451 }
8452
8453 RLI.Chain = LD->getChain();
8454 RLI.MPI = LD->getPointerInfo();
8455 RLI.IsDereferenceable = LD->isDereferenceable();
8456 RLI.IsInvariant = LD->isInvariant();
8457 RLI.Alignment = LD->getAlign();
8458 RLI.AAInfo = LD->getAAInfo();
8459 RLI.Ranges = LD->getRanges();
8460
8461 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8462 return true;
8463}
8464
8465// Given the head of the old chain, ResChain, insert a token factor containing
8466// it and NewResChain, and make users of ResChain now be users of that token
8467// factor.
8468// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8469void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8470 SDValue NewResChain,
8471 SelectionDAG &DAG) const {
8472 if (!ResChain)
8473 return;
8474
8475 SDLoc dl(NewResChain);
8476
8477 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8478 NewResChain, DAG.getUNDEF(MVT::Other));
8479 assert(TF.getNode() != NewResChain.getNode() &&
8480 "A new TF really is required here");
8481
8482 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8483 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8484}
8485
8486/// Analyze profitability of direct move
8487/// prefer float load to int load plus direct move
8488/// when there is no integer use of int load
8489bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8490 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8491 if (Origin->getOpcode() != ISD::LOAD)
8492 return true;
8493
8494 // If there is no LXSIBZX/LXSIHZX, like Power8,
8495 // prefer direct move if the memory size is 1 or 2 bytes.
8496 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8497 if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
8498 return true;
8499
8500 for (SDNode::use_iterator UI = Origin->use_begin(),
8501 UE = Origin->use_end();
8502 UI != UE; ++UI) {
8503
8504 // Only look at the users of the loaded value.
8505 if (UI.getUse().get().getResNo() != 0)
8506 continue;
8507
8508 if (UI->getOpcode() != ISD::SINT_TO_FP &&
8509 UI->getOpcode() != ISD::UINT_TO_FP &&
8510 UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8511 UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8512 return true;
8513 }
8514
8515 return false;
8516}
8517
8519 const PPCSubtarget &Subtarget,
8520 SDValue Chain = SDValue()) {
8521 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8522 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8523 SDLoc dl(Op);
8524
8525 // TODO: Any other flags to propagate?
8526 SDNodeFlags Flags;
8527 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8528
8529 // If we have FCFIDS, then use it when converting to single-precision.
8530 // Otherwise, convert to double-precision and then round.
8531 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8532 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8533 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8534 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8535 if (Op->isStrictFPOpcode()) {
8536 if (!Chain)
8537 Chain = Op.getOperand(0);
8538 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8539 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8540 } else
8541 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8542}
8543
8544/// Custom lowers integer to floating point conversions to use
8545/// the direct move instructions available in ISA 2.07 to avoid the
8546/// need for load/store combinations.
8547SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8548 SelectionDAG &DAG,
8549 const SDLoc &dl) const {
8550 assert((Op.getValueType() == MVT::f32 ||
8551 Op.getValueType() == MVT::f64) &&
8552 "Invalid floating point type as target of conversion");
8553 assert(Subtarget.hasFPCVT() &&
8554 "Int to FP conversions with direct moves require FPCVT");
8555 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8556 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8557 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8558 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8559 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8560 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8561 return convertIntToFP(Op, Mov, DAG, Subtarget);
8562}
8563
8564static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8565
8566 EVT VecVT = Vec.getValueType();
8567 assert(VecVT.isVector() && "Expected a vector type.");
8568 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8569
8570 EVT EltVT = VecVT.getVectorElementType();
8571 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8572 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8573
8574 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8575 SmallVector<SDValue, 16> Ops(NumConcat);
8576 Ops[0] = Vec;
8577 SDValue UndefVec = DAG.getUNDEF(VecVT);
8578 for (unsigned i = 1; i < NumConcat; ++i)
8579 Ops[i] = UndefVec;
8580
8581 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8582}
8583
8584SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8585 const SDLoc &dl) const {
8586 bool IsStrict = Op->isStrictFPOpcode();
8587 unsigned Opc = Op.getOpcode();
8588 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8589 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8591 "Unexpected conversion type");
8592 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8593 "Supports conversions to v2f64/v4f32 only.");
8594
8595 // TODO: Any other flags to propagate?
8597 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8598
8599 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8600 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8601
8602 SDValue Wide = widenVec(DAG, Src, dl);
8603 EVT WideVT = Wide.getValueType();
8604 unsigned WideNumElts = WideVT.getVectorNumElements();
8605 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8606
8607 SmallVector<int, 16> ShuffV;
8608 for (unsigned i = 0; i < WideNumElts; ++i)
8609 ShuffV.push_back(i + WideNumElts);
8610
8611 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8612 int SaveElts = FourEltRes ? 4 : 2;
8613 if (Subtarget.isLittleEndian())
8614 for (int i = 0; i < SaveElts; i++)
8615 ShuffV[i * Stride] = i;
8616 else
8617 for (int i = 1; i <= SaveElts; i++)
8618 ShuffV[i * Stride - 1] = i - 1;
8619
8620 SDValue ShuffleSrc2 =
8621 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8622 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8623
8624 SDValue Extend;
8625 if (SignedConv) {
8626 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8627 EVT ExtVT = Src.getValueType();
8628 if (Subtarget.hasP9Altivec())
8629 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8630 IntermediateVT.getVectorNumElements());
8631
8632 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8633 DAG.getValueType(ExtVT));
8634 } else
8635 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8636
8637 if (IsStrict)
8638 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8639 {Op.getOperand(0), Extend}, Flags);
8640
8641 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8642}
8643
8644SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8645 SelectionDAG &DAG) const {
8646 SDLoc dl(Op);
8647 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8648 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8649 bool IsStrict = Op->isStrictFPOpcode();
8650 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8651 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8652
8653 // TODO: Any other flags to propagate?
8655 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8656
8657 EVT InVT = Src.getValueType();
8658 EVT OutVT = Op.getValueType();
8659 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8660 isOperationCustom(Op.getOpcode(), InVT))
8661 return LowerINT_TO_FPVector(Op, DAG, dl);
8662
8663 // Conversions to f128 are legal.
8664 if (Op.getValueType() == MVT::f128)
8665 return Subtarget.hasP9Vector() ? Op : SDValue();
8666
8667 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8668 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8669 return SDValue();
8670
8671 if (Src.getValueType() == MVT::i1) {
8672 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8673 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8674 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8675 if (IsStrict)
8676 return DAG.getMergeValues({Sel, Chain}, dl);
8677 else
8678 return Sel;
8679 }
8680
8681 // If we have direct moves, we can do all the conversion, skip the store/load
8682 // however, without FPCVT we can't do most conversions.
8683 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8684 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8685 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8686
8687 assert((IsSigned || Subtarget.hasFPCVT()) &&
8688 "UINT_TO_FP is supported only with FPCVT");
8689
8690 if (Src.getValueType() == MVT::i64) {
8691 SDValue SINT = Src;
8692 // When converting to single-precision, we actually need to convert
8693 // to double-precision first and then round to single-precision.
8694 // To avoid double-rounding effects during that operation, we have
8695 // to prepare the input operand. Bits that might be truncated when
8696 // converting to double-precision are replaced by a bit that won't
8697 // be lost at this stage, but is below the single-precision rounding
8698 // position.
8699 //
8700 // However, if -enable-unsafe-fp-math is in effect, accept double
8701 // rounding to avoid the extra overhead.
8702 if (Op.getValueType() == MVT::f32 &&
8703 !Subtarget.hasFPCVT() &&
8705
8706 // Twiddle input to make sure the low 11 bits are zero. (If this
8707 // is the case, we are guaranteed the value will fit into the 53 bit
8708 // mantissa of an IEEE double-precision value without rounding.)
8709 // If any of those low 11 bits were not zero originally, make sure
8710 // bit 12 (value 2048) is set instead, so that the final rounding
8711 // to single-precision gets the correct result.
8712 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8713 SINT, DAG.getConstant(2047, dl, MVT::i64));
8714 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8715 Round, DAG.getConstant(2047, dl, MVT::i64));
8716 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8717 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8718 Round, DAG.getConstant(-2048, dl, MVT::i64));
8719
8720 // However, we cannot use that value unconditionally: if the magnitude
8721 // of the input value is small, the bit-twiddling we did above might
8722 // end up visibly changing the output. Fortunately, in that case, we
8723 // don't need to twiddle bits since the original input will convert
8724 // exactly to double-precision floating-point already. Therefore,
8725 // construct a conditional to use the original value if the top 11
8726 // bits are all sign-bit copies, and use the rounded value computed
8727 // above otherwise.
8728 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8729 SINT, DAG.getConstant(53, dl, MVT::i32));
8730 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8731 Cond, DAG.getConstant(1, dl, MVT::i64));
8732 Cond = DAG.getSetCC(
8733 dl,
8734 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8735 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8736
8737 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8738 }
8739
8740 ReuseLoadInfo RLI;
8741 SDValue Bits;
8742
8744 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8745 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8746 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8747 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8748 } else if (Subtarget.hasLFIWAX() &&
8749 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8750 MachineMemOperand *MMO =
8752 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8753 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8755 DAG.getVTList(MVT::f64, MVT::Other),
8756 Ops, MVT::i32, MMO);
8757 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8758 } else if (Subtarget.hasFPCVT() &&
8759 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8760 MachineMemOperand *MMO =
8762 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8763 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8765 DAG.getVTList(MVT::f64, MVT::Other),
8766 Ops, MVT::i32, MMO);
8767 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8768 } else if (((Subtarget.hasLFIWAX() &&
8769 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8770 (Subtarget.hasFPCVT() &&
8771 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8772 SINT.getOperand(0).getValueType() == MVT::i32) {
8773 MachineFrameInfo &MFI = MF.getFrameInfo();
8774 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8775
8776 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8777 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8778
8779 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8781 DAG.getMachineFunction(), FrameIdx));
8782 Chain = Store;
8783
8784 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8785 "Expected an i32 store");
8786
8787 RLI.Ptr = FIdx;
8788 RLI.Chain = Chain;
8789 RLI.MPI =
8791 RLI.Alignment = Align(4);
8792
8793 MachineMemOperand *MMO =
8795 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8796 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8799 dl, DAG.getVTList(MVT::f64, MVT::Other),
8800 Ops, MVT::i32, MMO);
8801 Chain = Bits.getValue(1);
8802 } else
8803 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8804
8805 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8806 if (IsStrict)
8807 Chain = FP.getValue(1);
8808
8809 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8810 if (IsStrict)
8812 DAG.getVTList(MVT::f32, MVT::Other),
8813 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8814 else
8815 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8816 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8817 }
8818 return FP;
8819 }
8820
8821 assert(Src.getValueType() == MVT::i32 &&
8822 "Unhandled INT_TO_FP type in custom expander!");
8823 // Since we only generate this in 64-bit mode, we can take advantage of
8824 // 64-bit registers. In particular, sign extend the input value into the
8825 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8826 // then lfd it and fcfid it.
8828 MachineFrameInfo &MFI = MF.getFrameInfo();
8829 EVT PtrVT = getPointerTy(MF.getDataLayout());
8830
8831 SDValue Ld;
8832 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8833 ReuseLoadInfo RLI;
8834 bool ReusingLoad;
8835 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8836 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8837 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8838
8839 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8841 DAG.getMachineFunction(), FrameIdx));
8842 Chain = Store;
8843
8844 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8845 "Expected an i32 store");
8846
8847 RLI.Ptr = FIdx;
8848 RLI.Chain = Chain;
8849 RLI.MPI =
8851 RLI.Alignment = Align(4);
8852 }
8853
8854 MachineMemOperand *MMO =
8856 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8857 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8858 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8859 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8860 MVT::i32, MMO);
8861 Chain = Ld.getValue(1);
8862 if (ReusingLoad)
8863 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8864 } else {
8865 assert(Subtarget.isPPC64() &&
8866 "i32->FP without LFIWAX supported only on PPC64");
8867
8868 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8869 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8870
8871 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8872
8873 // STD the extended value into the stack slot.
8874 SDValue Store = DAG.getStore(
8875 Chain, dl, Ext64, FIdx,
8877 Chain = Store;
8878
8879 // Load the value as a double.
8880 Ld = DAG.getLoad(
8881 MVT::f64, dl, Chain, FIdx,
8883 Chain = Ld.getValue(1);
8884 }
8885
8886 // FCFID it and return it.
8887 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8888 if (IsStrict)
8889 Chain = FP.getValue(1);
8890 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8891 if (IsStrict)
8893 DAG.getVTList(MVT::f32, MVT::Other),
8894 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8895 else
8896 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8897 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8898 }
8899 return FP;
8900}
8901
8902SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
8903 SelectionDAG &DAG) const {
8904 SDLoc dl(Op);
8905 /*
8906 The rounding mode is in bits 30:31 of FPSR, and has the following
8907 settings:
8908 00 Round to nearest
8909 01 Round to 0
8910 10 Round to +inf
8911 11 Round to -inf
8912
8913 GET_ROUNDING, on the other hand, expects the following:
8914 -1 Undefined
8915 0 Round to 0
8916 1 Round to nearest
8917 2 Round to +inf
8918 3 Round to -inf
8919
8920 To perform the conversion, we do:
8921 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8922 */
8923
8925 EVT VT = Op.getValueType();
8926 EVT PtrVT = getPointerTy(MF.getDataLayout());
8927
8928 // Save FP Control Word to register
8929 SDValue Chain = Op.getOperand(0);
8930 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8931 Chain = MFFS.getValue(1);
8932
8933 SDValue CWD;
8934 if (isTypeLegal(MVT::i64)) {
8935 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
8936 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
8937 } else {
8938 // Save FP register to stack slot
8939 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8940 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8941 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8942
8943 // Load FP Control Word from low 32 bits of stack slot.
8945 "Stack slot adjustment is valid only on big endian subtargets!");
8946 SDValue Four = DAG.getConstant(4, dl, PtrVT);
8947 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8948 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8949 Chain = CWD.getValue(1);
8950 }
8951
8952 // Transform as necessary
8953 SDValue CWD1 =
8954 DAG.getNode(ISD::AND, dl, MVT::i32,
8955 CWD, DAG.getConstant(3, dl, MVT::i32));
8956 SDValue CWD2 =
8957 DAG.getNode(ISD::SRL, dl, MVT::i32,
8958 DAG.getNode(ISD::AND, dl, MVT::i32,
8959 DAG.getNode(ISD::XOR, dl, MVT::i32,
8960 CWD, DAG.getConstant(3, dl, MVT::i32)),
8961 DAG.getConstant(3, dl, MVT::i32)),
8962 DAG.getConstant(1, dl, MVT::i32));
8963
8964 SDValue RetVal =
8965 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8966
8967 RetVal =
8969 dl, VT, RetVal);
8970
8971 return DAG.getMergeValues({RetVal, Chain}, dl);
8972}
8973
8974SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8975 EVT VT = Op.getValueType();
8976 unsigned BitWidth = VT.getSizeInBits();
8977 SDLoc dl(Op);
8978 assert(Op.getNumOperands() == 3 &&
8979 VT == Op.getOperand(1).getValueType() &&
8980 "Unexpected SHL!");
8981
8982 // Expand into a bunch of logical ops. Note that these ops
8983 // depend on the PPC behavior for oversized shift amounts.
8984 SDValue Lo = Op.getOperand(0);
8985 SDValue Hi = Op.getOperand(1);
8986 SDValue Amt = Op.getOperand(2);
8987 EVT AmtVT = Amt.getValueType();
8988
8989 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8990 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8991 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8992 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8993 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8994 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8995 DAG.getConstant(-BitWidth, dl, AmtVT));
8996 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8997 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8998 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8999 SDValue OutOps[] = { OutLo, OutHi };
9000 return DAG.getMergeValues(OutOps, dl);
9001}
9002
9003SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9004 EVT VT = Op.getValueType();
9005 SDLoc dl(Op);
9006 unsigned BitWidth = VT.getSizeInBits();
9007 assert(Op.getNumOperands() == 3 &&
9008 VT == Op.getOperand(1).getValueType() &&
9009 "Unexpected SRL!");
9010
9011 // Expand into a bunch of logical ops. Note that these ops
9012 // depend on the PPC behavior for oversized shift amounts.
9013 SDValue Lo = Op.getOperand(0);
9014 SDValue Hi = Op.getOperand(1);
9015 SDValue Amt = Op.getOperand(2);
9016 EVT AmtVT = Amt.getValueType();
9017
9018 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9019 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9020 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9021 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9022 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9023 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9024 DAG.getConstant(-BitWidth, dl, AmtVT));
9025 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9026 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9027 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9028 SDValue OutOps[] = { OutLo, OutHi };
9029 return DAG.getMergeValues(OutOps, dl);
9030}
9031
9032SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9033 SDLoc dl(Op);
9034 EVT VT = Op.getValueType();
9035 unsigned BitWidth = VT.getSizeInBits();
9036 assert(Op.getNumOperands() == 3 &&
9037 VT == Op.getOperand(1).getValueType() &&
9038 "Unexpected SRA!");
9039
9040 // Expand into a bunch of logical ops, followed by a select_cc.
9041 SDValue Lo = Op.getOperand(0);
9042 SDValue Hi = Op.getOperand(1);
9043 SDValue Amt = Op.getOperand(2);
9044 EVT AmtVT = Amt.getValueType();
9045
9046 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9047 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9048 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9049 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9050 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9051 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9052 DAG.getConstant(-BitWidth, dl, AmtVT));
9053 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9054 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9055 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9056 Tmp4, Tmp6, ISD::SETLE);
9057 SDValue OutOps[] = { OutLo, OutHi };
9058 return DAG.getMergeValues(OutOps, dl);
9059}
9060
9061SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9062 SelectionDAG &DAG) const {
9063 SDLoc dl(Op);
9064 EVT VT = Op.getValueType();
9065 unsigned BitWidth = VT.getSizeInBits();
9066
9067 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9068 SDValue X = Op.getOperand(0);
9069 SDValue Y = Op.getOperand(1);
9070 SDValue Z = Op.getOperand(2);
9071 EVT AmtVT = Z.getValueType();
9072
9073 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9074 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9075 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9076 // on PowerPC shift by BW being well defined.
9077 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9078 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9079 SDValue SubZ =
9080 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9081 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9082 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9083 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9084}
9085
9086//===----------------------------------------------------------------------===//
9087// Vector related lowering.
9088//
9089
9090/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9091/// element size of SplatSize. Cast the result to VT.
9092static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9093 SelectionDAG &DAG, const SDLoc &dl) {
9094 static const MVT VTys[] = { // canonical VT to use for each size.
9095 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9096 };
9097
9098 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9099
9100 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9101 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9102 SplatSize = 1;
9103 Val = 0xFF;
9104 }
9105
9106 EVT CanonicalVT = VTys[SplatSize-1];
9107
9108 // Build a canonical splat for this value.
9109 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9110}
9111
9112/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9113/// specified intrinsic ID.
9115 const SDLoc &dl, EVT DestVT = MVT::Other) {
9116 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9117 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9118 DAG.getConstant(IID, dl, MVT::i32), Op);
9119}
9120
9121/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9122/// specified intrinsic ID.
9123static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9124 SelectionDAG &DAG, const SDLoc &dl,
9125 EVT DestVT = MVT::Other) {
9126 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9127 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9128 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9129}
9130
9131/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9132/// specified intrinsic ID.
9133static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9134 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9135 EVT DestVT = MVT::Other) {
9136 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9137 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9138 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9139}
9140
9141/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9142/// amount. The result has the specified value type.
9143static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9144 SelectionDAG &DAG, const SDLoc &dl) {
9145 // Force LHS/RHS to be the right type.
9146 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9147 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9148
9149 int Ops[16];
9150 for (unsigned i = 0; i != 16; ++i)
9151 Ops[i] = i + Amt;
9152 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9153 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9154}
9155
9156/// Do we have an efficient pattern in a .td file for this node?
9157///
9158/// \param V - pointer to the BuildVectorSDNode being matched
9159/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9160///
9161/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9162/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9163/// the opposite is true (expansion is beneficial) are:
9164/// - The node builds a vector out of integers that are not 32 or 64-bits
9165/// - The node builds a vector out of constants
9166/// - The node is a "load-and-splat"
9167/// In all other cases, we will choose to keep the BUILD_VECTOR.
9169 bool HasDirectMove,
9170 bool HasP8Vector) {
9171 EVT VecVT = V->getValueType(0);
9172 bool RightType = VecVT == MVT::v2f64 ||
9173 (HasP8Vector && VecVT == MVT::v4f32) ||
9174 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9175 if (!RightType)
9176 return false;
9177
9178 bool IsSplat = true;
9179 bool IsLoad = false;
9180 SDValue Op0 = V->getOperand(0);
9181
9182 // This function is called in a block that confirms the node is not a constant
9183 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9184 // different constants.
9185 if (V->isConstant())
9186 return false;
9187 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9188 if (V->getOperand(i).isUndef())
9189 return false;
9190 // We want to expand nodes that represent load-and-splat even if the
9191 // loaded value is a floating point truncation or conversion to int.
9192 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9193 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9194 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9195 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9196 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9197 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9198 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9199 IsLoad = true;
9200 // If the operands are different or the input is not a load and has more
9201 // uses than just this BV node, then it isn't a splat.
9202 if (V->getOperand(i) != Op0 ||
9203 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9204 IsSplat = false;
9205 }
9206 return !(IsSplat && IsLoad);
9207}
9208
9209// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9210SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9211
9212 SDLoc dl(Op);
9213 SDValue Op0 = Op->getOperand(0);
9214
9215 if ((Op.getValueType() != MVT::f128) ||
9216 (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9217 (Op0.getOperand(0).getValueType() != MVT::i64) ||
9218 (Op0.getOperand(1).getValueType() != MVT::i64))
9219 return SDValue();
9220
9221 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9222 Op0.getOperand(1));
9223}
9224
9225static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9226 const SDValue *InputLoad = &Op;
9227 while (InputLoad->getOpcode() == ISD::BITCAST)
9228 InputLoad = &InputLoad->getOperand(0);
9229 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9231 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9232 InputLoad = &InputLoad->getOperand(0);
9233 }
9234 if (InputLoad->getOpcode() != ISD::LOAD)
9235 return nullptr;
9236 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9237 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9238}
9239
9240// Convert the argument APFloat to a single precision APFloat if there is no
9241// loss in information during the conversion to single precision APFloat and the
9242// resulting number is not a denormal number. Return true if successful.
9244 APFloat APFloatToConvert = ArgAPFloat;
9245 bool LosesInfo = true;
9247 &LosesInfo);
9248 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9249 if (Success)
9250 ArgAPFloat = APFloatToConvert;
9251 return Success;
9252}
9253
9254// Bitcast the argument APInt to a double and convert it to a single precision
9255// APFloat, bitcast the APFloat to an APInt and assign it to the original
9256// argument if there is no loss in information during the conversion from
9257// double to single precision APFloat and the resulting number is not a denormal
9258// number. Return true if successful.
9260 double DpValue = ArgAPInt.bitsToDouble();
9261 APFloat APFloatDp(DpValue);
9262 bool Success = convertToNonDenormSingle(APFloatDp);
9263 if (Success)
9264 ArgAPInt = APFloatDp.bitcastToAPInt();
9265 return Success;
9266}
9267
9268// Nondestructive check for convertTonNonDenormSingle.
9270 // Only convert if it loses info, since XXSPLTIDP should
9271 // handle the other case.
9272 APFloat APFloatToConvert = ArgAPFloat;
9273 bool LosesInfo = true;
9275 &LosesInfo);
9276
9277 return (!LosesInfo && !APFloatToConvert.isDenormal());
9278}
9279
9280static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9281 unsigned &Opcode) {
9282 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9283 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9284 return false;
9285
9286 EVT Ty = Op->getValueType(0);
9287 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9288 // as we cannot handle extending loads for these types.
9289 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9290 ISD::isNON_EXTLoad(InputNode))
9291 return true;
9292
9293 EVT MemVT = InputNode->getMemoryVT();
9294 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9295 // memory VT is the same vector element VT type.
9296 // The loads feeding into the v8i16 and v16i8 types will be extending because
9297 // scalar i8/i16 are not legal types.
9298 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9299 (MemVT == Ty.getVectorElementType()))
9300 return true;
9301
9302 if (Ty == MVT::v2i64) {
9303 // Check the extend type, when the input type is i32, and the output vector
9304 // type is v2i64.
9305 if (MemVT == MVT::i32) {
9306 if (ISD::isZEXTLoad(InputNode))
9307 Opcode = PPCISD::ZEXT_LD_SPLAT;
9308 if (ISD::isSEXTLoad(InputNode))
9309 Opcode = PPCISD::SEXT_LD_SPLAT;
9310 }
9311 return true;
9312 }
9313 return false;
9314}
9315
9316// If this is a case we can't handle, return null and let the default
9317// expansion code take care of it. If we CAN select this case, and if it
9318// selects to a single instruction, return Op. Otherwise, if we can codegen
9319// this case more efficiently than a constant pool load, lower it to the
9320// sequence of ops that should be used.
9321SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9322 SelectionDAG &DAG) const {
9323 SDLoc dl(Op);
9324 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9325 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9326
9327 // Check if this is a splat of a constant value.
9328 APInt APSplatBits, APSplatUndef;
9329 unsigned SplatBitSize;
9330 bool HasAnyUndefs;
9331 bool BVNIsConstantSplat =
9332 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9333 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9334
9335 // If it is a splat of a double, check if we can shrink it to a 32 bit
9336 // non-denormal float which when converted back to double gives us the same
9337 // double. This is to exploit the XXSPLTIDP instruction.
9338 // If we lose precision, we use XXSPLTI32DX.
9339 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9340 Subtarget.hasPrefixInstrs()) {
9341 // Check the type first to short-circuit so we don't modify APSplatBits if
9342 // this block isn't executed.
9343 if ((Op->getValueType(0) == MVT::v2f64) &&
9344 convertToNonDenormSingle(APSplatBits)) {
9345 SDValue SplatNode = DAG.getNode(
9346 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9347 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9348 return DAG.getBitcast(Op.getValueType(), SplatNode);
9349 } else {
9350 // We may lose precision, so we have to use XXSPLTI32DX.
9351
9352 uint32_t Hi =
9353 (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
9354 uint32_t Lo =
9355 (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
9356 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9357
9358 if (!Hi || !Lo)
9359 // If either load is 0, then we should generate XXLXOR to set to 0.
9360 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9361
9362 if (Hi)
9363 SplatNode = DAG.getNode(
9364 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9365 DAG.getTargetConstant(0, dl, MVT::i32),
9366 DAG.getTargetConstant(Hi, dl, MVT::i32));
9367
9368 if (Lo)
9369 SplatNode =
9370 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9371 DAG.getTargetConstant(1, dl, MVT::i32),
9372 DAG.getTargetConstant(Lo, dl, MVT::i32));
9373
9374 return DAG.getBitcast(Op.getValueType(), SplatNode);
9375 }
9376 }
9377
9378 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9379 unsigned NewOpcode = PPCISD::LD_SPLAT;
9380
9381 // Handle load-and-splat patterns as we have instructions that will do this
9382 // in one go.
9383 if (DAG.isSplatValue(Op, true) &&
9384 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9385 const SDValue *InputLoad = &Op.getOperand(0);
9386 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9387
9388 // If the input load is an extending load, it will be an i32 -> i64
9389 // extending load and isValidSplatLoad() will update NewOpcode.
9390 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9391 unsigned ElementSize =
9392 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9393
9394 assert(((ElementSize == 2 * MemorySize)
9395 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9396 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9397 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9398 "Unmatched element size and opcode!\n");
9399
9400 // Checking for a single use of this load, we have to check for vector
9401 // width (128 bits) / ElementSize uses (since each operand of the
9402 // BUILD_VECTOR is a separate use of the value.
9403 unsigned NumUsesOfInputLD = 128 / ElementSize;
9404 for (SDValue BVInOp : Op->ops())
9405 if (BVInOp.isUndef())
9406 NumUsesOfInputLD--;
9407
9408 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9409 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9410 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9411 // 15", but function IsValidSplatLoad() now will only return true when
9412 // the data at index 0 is not nullptr. So we will not get into trouble for
9413 // these cases.
9414 //
9415 // case 1 - lfiwzx/lfiwax
9416 // 1.1: load result is i32 and is sign/zero extend to i64;
9417 // 1.2: build a v2i64 vector type with above loaded value;
9418 // 1.3: the vector has only one value at index 0, others are all undef;
9419 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9420 if (NumUsesOfInputLD == 1 &&
9421 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9422 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9423 Subtarget.hasLFIWAX()))
9424 return SDValue();
9425
9426 // case 2 - lxvr[hb]x
9427 // 2.1: load result is at most i16;
9428 // 2.2: build a vector with above loaded value;
9429 // 2.3: the vector has only one value at index 0, others are all undef;
9430 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9431 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9432 Subtarget.isISA3_1() && ElementSize <= 16)
9433 return SDValue();
9434
9435 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9436 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9437 Subtarget.hasVSX()) {
9438 SDValue Ops[] = {
9439 LD->getChain(), // Chain
9440 LD->getBasePtr(), // Ptr
9441 DAG.getValueType(Op.getValueType()) // VT
9442 };
9443 SDValue LdSplt = DAG.getMemIntrinsicNode(
9444 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9445 LD->getMemoryVT(), LD->getMemOperand());
9446 // Replace all uses of the output chain of the original load with the
9447 // output chain of the new load.
9448 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9449 LdSplt.getValue(1));
9450 return LdSplt;
9451 }
9452 }
9453
9454 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9455 // 32-bits can be lowered to VSX instructions under certain conditions.
9456 // Without VSX, there is no pattern more efficient than expanding the node.
9457 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9458 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9459 Subtarget.hasP8Vector()))
9460 return Op;
9461 return SDValue();
9462 }
9463
9464 uint64_t SplatBits = APSplatBits.getZExtValue();
9465 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9466 unsigned SplatSize = SplatBitSize / 8;
9467
9468 // First, handle single instruction cases.
9469
9470 // All zeros?
9471 if (SplatBits == 0) {
9472 // Canonicalize all zero vectors to be v4i32.
9473 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9474 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9475 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9476 }
9477 return Op;
9478 }
9479
9480 // We have XXSPLTIW for constant splats four bytes wide.
9481 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9482 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9483 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9484 // turned into a 4-byte splat of 0xABABABAB.
9485 if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9486 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9487 Op.getValueType(), DAG, dl);
9488
9489 if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9490 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9491 dl);
9492
9493 // We have XXSPLTIB for constant splats one byte wide.
9494 if (Subtarget.hasP9Vector() && SplatSize == 1)
9495 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9496 dl);
9497
9498 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9499 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9500 (32-SplatBitSize));
9501 if (SextVal >= -16 && SextVal <= 15)
9502 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9503 dl);
9504
9505 // Two instruction sequences.
9506
9507 // If this value is in the range [-32,30] and is even, use:
9508 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9509 // If this value is in the range [17,31] and is odd, use:
9510 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9511 // If this value is in the range [-31,-17] and is odd, use:
9512 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9513 // Note the last two are three-instruction sequences.
9514 if (SextVal >= -32 && SextVal <= 31) {
9515 // To avoid having these optimizations undone by constant folding,
9516 // we convert to a pseudo that will be expanded later into one of
9517 // the above forms.
9518 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9519 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9520 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9521 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9522 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9523 if (VT == Op.getValueType())
9524 return RetVal;
9525 else
9526 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9527 }
9528
9529 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9530 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9531 // for fneg/fabs.
9532 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9533 // Make -1 and vspltisw -1:
9534 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9535
9536 // Make the VSLW intrinsic, computing 0x8000_0000.
9537 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9538 OnesV, DAG, dl);
9539
9540 // xor by OnesV to invert it.
9541 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9542 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9543 }
9544
9545 // Check to see if this is a wide variety of vsplti*, binop self cases.
9546 static const signed char SplatCsts[] = {
9547 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9548 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9549 };
9550
9551 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9552 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9553 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9554 int i = SplatCsts[idx];
9555
9556 // Figure out what shift amount will be used by altivec if shifted by i in
9557 // this splat size.
9558 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9559
9560 // vsplti + shl self.
9561 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9562 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9563 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9564 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9565 Intrinsic::ppc_altivec_vslw
9566 };
9567 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9568 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9569 }
9570
9571 // vsplti + srl self.
9572 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9573 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9574 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9575 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9576 Intrinsic::ppc_altivec_vsrw
9577 };
9578 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9579 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9580 }
9581
9582 // vsplti + rol self.
9583 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9584 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9585 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9586 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9587 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9588 Intrinsic::ppc_altivec_vrlw
9589 };
9590 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9591 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9592 }
9593
9594 // t = vsplti c, result = vsldoi t, t, 1
9595 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9596 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9597 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9598 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9599 }
9600 // t = vsplti c, result = vsldoi t, t, 2
9601 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9602 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9603 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9604 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9605 }
9606 // t = vsplti c, result = vsldoi t, t, 3
9607 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9608 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9609 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9610 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9611 }
9612 }
9613
9614 return SDValue();
9615}
9616
9617/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9618/// the specified operations to build the shuffle.
9619static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9620 SDValue RHS, SelectionDAG &DAG,
9621 const SDLoc &dl) {
9622 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9623 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9624 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9625
9626 enum {
9627 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9628 OP_VMRGHW,
9629 OP_VMRGLW,
9630 OP_VSPLTISW0,
9631 OP_VSPLTISW1,
9632 OP_VSPLTISW2,
9633 OP_VSPLTISW3,
9634 OP_VSLDOI4,
9635 OP_VSLDOI8,
9636 OP_VSLDOI12
9637 };
9638
9639 if (OpNum == OP_COPY) {
9640 if (LHSID == (1*9+2)*9+3) return LHS;
9641 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9642 return RHS;
9643 }
9644
9645 SDValue OpLHS, OpRHS;
9646 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9647 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9648
9649 int ShufIdxs[16];
9650 switch (OpNum) {
9651 default: llvm_unreachable("Unknown i32 permute!");
9652 case OP_VMRGHW:
9653 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9654 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9655 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9656 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9657 break;
9658 case OP_VMRGLW:
9659 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9660 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9661 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9662 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9663 break;
9664 case OP_VSPLTISW0:
9665 for (unsigned i = 0; i != 16; ++i)
9666 ShufIdxs[i] = (i&3)+0;
9667 break;
9668 case OP_VSPLTISW1:
9669 for (unsigned i = 0; i != 16; ++i)
9670 ShufIdxs[i] = (i&3)+4;
9671 break;
9672 case OP_VSPLTISW2:
9673 for (unsigned i = 0; i != 16; ++i)
9674 ShufIdxs[i] = (i&3)+8;
9675 break;
9676 case OP_VSPLTISW3:
9677 for (unsigned i = 0; i != 16; ++i)
9678 ShufIdxs[i] = (i&3)+12;
9679 break;
9680 case OP_VSLDOI4:
9681 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9682 case OP_VSLDOI8:
9683 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9684 case OP_VSLDOI12:
9685 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9686 }
9687 EVT VT = OpLHS.getValueType();
9688 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9689 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9690 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9691 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9692}
9693
9694/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9695/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9696/// SDValue.
9697SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9698 SelectionDAG &DAG) const {
9699 const unsigned BytesInVector = 16;
9700 bool IsLE = Subtarget.isLittleEndian();
9701 SDLoc dl(N);
9702 SDValue V1 = N->getOperand(0);
9703 SDValue V2 = N->getOperand(1);
9704 unsigned ShiftElts = 0, InsertAtByte = 0;
9705 bool Swap = false;
9706
9707 // Shifts required to get the byte we want at element 7.
9708 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9709 0, 15, 14, 13, 12, 11, 10, 9};
9710 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9711 1, 2, 3, 4, 5, 6, 7, 8};
9712
9713 ArrayRef<int> Mask = N->getMask();
9714 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9715
9716 // For each mask element, find out if we're just inserting something
9717 // from V2 into V1 or vice versa.
9718 // Possible permutations inserting an element from V2 into V1:
9719 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9720 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9721 // ...
9722 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9723 // Inserting from V1 into V2 will be similar, except mask range will be
9724 // [16,31].
9725
9726 bool FoundCandidate = false;
9727 // If both vector operands for the shuffle are the same vector, the mask
9728 // will contain only elements from the first one and the second one will be
9729 // undef.
9730 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9731 // Go through the mask of half-words to find an element that's being moved
9732 // from one vector to the other.
9733 for (unsigned i = 0; i < BytesInVector; ++i) {
9734 unsigned CurrentElement = Mask[i];
9735 // If 2nd operand is undefined, we should only look for element 7 in the
9736 // Mask.
9737 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9738 continue;
9739
9740 bool OtherElementsInOrder = true;
9741 // Examine the other elements in the Mask to see if they're in original
9742 // order.
9743 for (unsigned j = 0; j < BytesInVector; ++j) {
9744 if (j == i)
9745 continue;
9746 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9747 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9748 // in which we always assume we're always picking from the 1st operand.
9749 int MaskOffset =
9750 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9751 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9752 OtherElementsInOrder = false;
9753 break;
9754 }
9755 }
9756 // If other elements are in original order, we record the number of shifts
9757 // we need to get the element we want into element 7. Also record which byte
9758 // in the vector we should insert into.
9759 if (OtherElementsInOrder) {
9760 // If 2nd operand is undefined, we assume no shifts and no swapping.
9761 if (V2.isUndef()) {
9762 ShiftElts = 0;
9763 Swap = false;
9764 } else {
9765 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9766 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9767 : BigEndianShifts[CurrentElement & 0xF];
9768 Swap = CurrentElement < BytesInVector;
9769 }
9770 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9771 FoundCandidate = true;
9772 break;
9773 }
9774 }
9775
9776 if (!FoundCandidate)
9777 return SDValue();
9778
9779 // Candidate found, construct the proper SDAG sequence with VINSERTB,
9780 // optionally with VECSHL if shift is required.
9781 if (Swap)
9782 std::swap(V1, V2);
9783 if (V2.isUndef())
9784 V2 = V1;
9785 if (ShiftElts) {
9786 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9787 DAG.getConstant(ShiftElts, dl, MVT::i32));
9788 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9789 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9790 }
9791 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9792 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9793}
9794
9795/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9796/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9797/// SDValue.
9798SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9799 SelectionDAG &DAG) const {
9800 const unsigned NumHalfWords = 8;
9801 const unsigned BytesInVector = NumHalfWords * 2;
9802 // Check that the shuffle is on half-words.
9803 if (!isNByteElemShuffleMask(N, 2, 1))
9804 return SDValue();
9805
9806 bool IsLE = Subtarget.isLittleEndian();
9807 SDLoc dl(N);
9808 SDValue V1 = N->getOperand(0);
9809 SDValue V2 = N->getOperand(1);
9810 unsigned ShiftElts = 0, InsertAtByte = 0;
9811 bool Swap = false;
9812
9813 // Shifts required to get the half-word we want at element 3.
9814 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9815 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9816
9817 uint32_t Mask = 0;
9818 uint32_t OriginalOrderLow = 0x1234567;
9819 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9820 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9821 // 32-bit space, only need 4-bit nibbles per element.
9822 for (unsigned i = 0; i < NumHalfWords; ++i) {
9823 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9824 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9825 }
9826
9827 // For each mask element, find out if we're just inserting something
9828 // from V2 into V1 or vice versa. Possible permutations inserting an element
9829 // from V2 into V1:
9830 // X, 1, 2, 3, 4, 5, 6, 7
9831 // 0, X, 2, 3, 4, 5, 6, 7
9832 // 0, 1, X, 3, 4, 5, 6, 7
9833 // 0, 1, 2, X, 4, 5, 6, 7
9834 // 0, 1, 2, 3, X, 5, 6, 7
9835 // 0, 1, 2, 3, 4, X, 6, 7
9836 // 0, 1, 2, 3, 4, 5, X, 7
9837 // 0, 1, 2, 3, 4, 5, 6, X
9838 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9839
9840 bool FoundCandidate = false;
9841 // Go through the mask of half-words to find an element that's being moved
9842 // from one vector to the other.
9843 for (unsigned i = 0; i < NumHalfWords; ++i) {
9844 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9845 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9846 uint32_t MaskOtherElts = ~(0xF << MaskShift);
9847 uint32_t TargetOrder = 0x0;
9848
9849 // If both vector operands for the shuffle are the same vector, the mask
9850 // will contain only elements from the first one and the second one will be
9851 // undef.
9852 if (V2.isUndef()) {
9853 ShiftElts = 0;
9854 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9855 TargetOrder = OriginalOrderLow;
9856 Swap = false;
9857 // Skip if not the correct element or mask of other elements don't equal
9858 // to our expected order.
9859 if (MaskOneElt == VINSERTHSrcElem &&
9860 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9861 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9862 FoundCandidate = true;
9863 break;
9864 }
9865 } else { // If both operands are defined.
9866 // Target order is [8,15] if the current mask is between [0,7].
9867 TargetOrder =
9868 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9869 // Skip if mask of other elements don't equal our expected order.
9870 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9871 // We only need the last 3 bits for the number of shifts.
9872 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9873 : BigEndianShifts[MaskOneElt & 0x7];
9874 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9875 Swap = MaskOneElt < NumHalfWords;
9876 FoundCandidate = true;
9877 break;
9878 }
9879 }
9880 }
9881
9882 if (!FoundCandidate)
9883 return SDValue();
9884
9885 // Candidate found, construct the proper SDAG sequence with VINSERTH,
9886 // optionally with VECSHL if shift is required.
9887 if (Swap)
9888 std::swap(V1, V2);
9889 if (V2.isUndef())
9890 V2 = V1;
9891 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9892 if (ShiftElts) {
9893 // Double ShiftElts because we're left shifting on v16i8 type.
9894 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9895 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9896 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9897 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9898 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9899 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9900 }
9901 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9902 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9903 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9904 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9905}
9906
9907/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9908/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9909/// return the default SDValue.
9910SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9911 SelectionDAG &DAG) const {
9912 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9913 // to v16i8. Peek through the bitcasts to get the actual operands.
9916
9917 auto ShuffleMask = SVN->getMask();
9918 SDValue VecShuffle(SVN, 0);
9919 SDLoc DL(SVN);
9920
9921 // Check that we have a four byte shuffle.
9922 if (!isNByteElemShuffleMask(SVN, 4, 1))
9923 return SDValue();
9924
9925 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9926 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9927 std::swap(LHS, RHS);
9929 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
9930 if (!CommutedSV)
9931 return SDValue();
9932 ShuffleMask = CommutedSV->getMask();
9933 }
9934
9935 // Ensure that the RHS is a vector of constants.
9936 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9937 if (!BVN)
9938 return SDValue();
9939
9940 // Check if RHS is a splat of 4-bytes (or smaller).
9941 APInt APSplatValue, APSplatUndef;
9942 unsigned SplatBitSize;
9943 bool HasAnyUndefs;
9944 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9945 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9946 SplatBitSize > 32)
9947 return SDValue();
9948
9949 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9950 // The instruction splats a constant C into two words of the source vector
9951 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9952 // Thus we check that the shuffle mask is the equivalent of
9953 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9954 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9955 // within each word are consecutive, so we only need to check the first byte.
9956 SDValue Index;
9957 bool IsLE = Subtarget.isLittleEndian();
9958 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9959 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9960 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9961 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9962 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9963 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9964 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9965 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9966 else
9967 return SDValue();
9968
9969 // If the splat is narrower than 32-bits, we need to get the 32-bit value
9970 // for XXSPLTI32DX.
9971 unsigned SplatVal = APSplatValue.getZExtValue();
9972 for (; SplatBitSize < 32; SplatBitSize <<= 1)
9973 SplatVal |= (SplatVal << SplatBitSize);
9974
9975 SDValue SplatNode = DAG.getNode(
9976 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
9977 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9978 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9979}
9980
9981/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9982/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9983/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9984/// i.e (or (shl x, C1), (srl x, 128-C1)).
9985SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9986 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
9987 assert(Op.getValueType() == MVT::v1i128 &&
9988 "Only set v1i128 as custom, other type shouldn't reach here!");
9989 SDLoc dl(Op);
9990 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9991 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9992 unsigned SHLAmt = N1.getConstantOperandVal(0);
9993 if (SHLAmt % 8 == 0) {
9994 std::array<int, 16> Mask;
9995 std::iota(Mask.begin(), Mask.end(), 0);
9996 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9997 if (SDValue Shuffle =
9998 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
9999 DAG.getUNDEF(MVT::v16i8), Mask))
10000 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10001 }
10002 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10003 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10004 DAG.getConstant(SHLAmt, dl, MVT::i32));
10005 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10006 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10007 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10008 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10009}
10010
10011/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10012/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10013/// return the code it can be lowered into. Worst case, it can always be
10014/// lowered into a vperm.
10015SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10016 SelectionDAG &DAG) const {
10017 SDLoc dl(Op);
10018 SDValue V1 = Op.getOperand(0);
10019 SDValue V2 = Op.getOperand(1);
10020 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10021
10022 // Any nodes that were combined in the target-independent combiner prior
10023 // to vector legalization will not be sent to the target combine. Try to
10024 // combine it here.
10025 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10026 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10027 return NewShuffle;
10028 Op = NewShuffle;
10029 SVOp = cast<ShuffleVectorSDNode>(Op);
10030 V1 = Op.getOperand(0);
10031 V2 = Op.getOperand(1);
10032 }
10033 EVT VT = Op.getValueType();
10034 bool isLittleEndian = Subtarget.isLittleEndian();
10035
10036 unsigned ShiftElts, InsertAtByte;
10037 bool Swap = false;
10038
10039 // If this is a load-and-splat, we can do that with a single instruction
10040 // in some cases. However if the load has multiple uses, we don't want to
10041 // combine it because that will just produce multiple loads.
10042 bool IsPermutedLoad = false;
10043 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10044 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10045 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10046 InputLoad->hasOneUse()) {
10047 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10048 int SplatIdx =
10049 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10050
10051 // The splat index for permuted loads will be in the left half of the vector
10052 // which is strictly wider than the loaded value by 8 bytes. So we need to
10053 // adjust the splat index to point to the correct address in memory.
10054 if (IsPermutedLoad) {
10055 assert((isLittleEndian || IsFourByte) &&
10056 "Unexpected size for permuted load on big endian target");
10057 SplatIdx += IsFourByte ? 2 : 1;
10058 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10059 "Splat of a value outside of the loaded memory");
10060 }
10061
10062 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10063 // For 4-byte load-and-splat, we need Power9.
10064 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10065 uint64_t Offset = 0;
10066 if (IsFourByte)
10067 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10068 else
10069 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10070
10071 // If the width of the load is the same as the width of the splat,
10072 // loading with an offset would load the wrong memory.
10073 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10074 Offset = 0;
10075
10076 SDValue BasePtr = LD->getBasePtr();
10077 if (Offset != 0)
10079 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10080 SDValue Ops[] = {
10081 LD->getChain(), // Chain
10082 BasePtr, // BasePtr
10083 DAG.getValueType(Op.getValueType()) // VT
10084 };
10085 SDVTList VTL =
10086 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10087 SDValue LdSplt =
10089 Ops, LD->getMemoryVT(), LD->getMemOperand());
10090 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10091 if (LdSplt.getValueType() != SVOp->getValueType(0))
10092 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10093 return LdSplt;
10094 }
10095 }
10096
10097 // All v2i64 and v2f64 shuffles are legal
10098 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10099 return Op;
10100
10101 if (Subtarget.hasP9Vector() &&
10102 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10103 isLittleEndian)) {
10104 if (Swap)
10105 std::swap(V1, V2);
10106 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10107 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10108 if (ShiftElts) {
10109 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10110 DAG.getConstant(ShiftElts, dl, MVT::i32));
10111 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10112 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10113 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10114 }
10115 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10116 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10117 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10118 }
10119
10120 if (Subtarget.hasPrefixInstrs()) {
10121 SDValue SplatInsertNode;
10122 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10123 return SplatInsertNode;
10124 }
10125
10126 if (Subtarget.hasP9Altivec()) {
10127 SDValue NewISDNode;
10128 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10129 return NewISDNode;
10130
10131 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10132 return NewISDNode;
10133 }
10134
10135 if (Subtarget.hasVSX() &&
10136 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10137 if (Swap)
10138 std::swap(V1, V2);
10139 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10140 SDValue Conv2 =
10141 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10142
10143 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10144 DAG.getConstant(ShiftElts, dl, MVT::i32));
10145 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10146 }
10147
10148 if (Subtarget.hasVSX() &&
10149 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10150 if (Swap)
10151 std::swap(V1, V2);
10152 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10153 SDValue Conv2 =
10154 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10155
10156 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10157 DAG.getConstant(ShiftElts, dl, MVT::i32));
10158 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10159 }
10160
10161 if (Subtarget.hasP9Vector()) {
10162 if (PPC::isXXBRHShuffleMask(SVOp)) {
10163 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10164 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10165 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10166 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10167 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10168 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10169 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10170 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10171 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10172 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10173 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10174 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10175 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10176 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10177 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10178 }
10179 }
10180
10181 if (Subtarget.hasVSX()) {
10182 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10183 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10184
10185 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10186 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10187 DAG.getConstant(SplatIdx, dl, MVT::i32));
10188 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10189 }
10190
10191 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10192 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10193 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10194 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10195 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10196 }
10197 }
10198
10199 // Cases that are handled by instructions that take permute immediates
10200 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10201 // selected by the instruction selector.
10202 if (V2.isUndef()) {
10203 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10204 PPC::isSplatShuffleMask(SVOp, 2) ||
10205 PPC::isSplatShuffleMask(SVOp, 4) ||
10206 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10207 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10208 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10209 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10210 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10211 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10212 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10213 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10214 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10215 (Subtarget.hasP8Altivec() && (
10216 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10217 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10218 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10219 return Op;
10220 }
10221 }
10222
10223 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10224 // and produce a fixed permutation. If any of these match, do not lower to
10225 // VPERM.
10226 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10227 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10228 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10229 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10230 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10231 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10232 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10233 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10234 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10235 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10236 (Subtarget.hasP8Altivec() && (
10237 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10238 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10239 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10240 return Op;
10241
10242 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10243 // perfect shuffle table to emit an optimal matching sequence.
10244 ArrayRef<int> PermMask = SVOp->getMask();
10245
10246 if (!DisablePerfectShuffle && !isLittleEndian) {
10247 unsigned PFIndexes[4];
10248 bool isFourElementShuffle = true;
10249 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10250 ++i) { // Element number
10251 unsigned EltNo = 8; // Start out undef.
10252 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10253 if (PermMask[i * 4 + j] < 0)
10254 continue; // Undef, ignore it.
10255
10256 unsigned ByteSource = PermMask[i * 4 + j];
10257 if ((ByteSource & 3) != j) {
10258 isFourElementShuffle = false;
10259 break;
10260 }
10261
10262 if (EltNo == 8) {
10263 EltNo = ByteSource / 4;
10264 } else if (EltNo != ByteSource / 4) {
10265 isFourElementShuffle = false;
10266 break;
10267 }
10268 }
10269 PFIndexes[i] = EltNo;
10270 }
10271
10272 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10273 // perfect shuffle vector to determine if it is cost effective to do this as
10274 // discrete instructions, or whether we should use a vperm.
10275 // For now, we skip this for little endian until such time as we have a
10276 // little-endian perfect shuffle table.
10277 if (isFourElementShuffle) {
10278 // Compute the index in the perfect shuffle table.
10279 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10280 PFIndexes[2] * 9 + PFIndexes[3];
10281
10282 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10283 unsigned Cost = (PFEntry >> 30);
10284
10285 // Determining when to avoid vperm is tricky. Many things affect the cost
10286 // of vperm, particularly how many times the perm mask needs to be
10287 // computed. For example, if the perm mask can be hoisted out of a loop or
10288 // is already used (perhaps because there are multiple permutes with the
10289 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10290 // permute mask out of the loop requires an extra register.
10291 //
10292 // As a compromise, we only emit discrete instructions if the shuffle can
10293 // be generated in 3 or fewer operations. When we have loop information
10294 // available, if this block is within a loop, we should avoid using vperm
10295 // for 3-operation perms and use a constant pool load instead.
10296 if (Cost < 3)
10297 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10298 }
10299 }
10300
10301 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10302 // vector that will get spilled to the constant pool.
10303 if (V2.isUndef()) V2 = V1;
10304
10305 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10306}
10307
10308SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10309 ArrayRef<int> PermMask, EVT VT,
10310 SDValue V1, SDValue V2) const {
10311 unsigned Opcode = PPCISD::VPERM;
10312 EVT ValType = V1.getValueType();
10313 SDLoc dl(Op);
10314 bool NeedSwap = false;
10315 bool isLittleEndian = Subtarget.isLittleEndian();
10316 bool isPPC64 = Subtarget.isPPC64();
10317
10318 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10319 (V1->hasOneUse() || V2->hasOneUse())) {
10320 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10321 "XXPERM instead\n");
10322 Opcode = PPCISD::XXPERM;
10323
10324 // The second input to XXPERM is also an output so if the second input has
10325 // multiple uses then copying is necessary, as a result we want the
10326 // single-use operand to be used as the second input to prevent copying.
10327 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10328 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10329 std::swap(V1, V2);
10330 NeedSwap = !NeedSwap;
10331 }
10332 }
10333
10334 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10335 // that it is in input element units, not in bytes. Convert now.
10336
10337 // For little endian, the order of the input vectors is reversed, and
10338 // the permutation mask is complemented with respect to 31. This is
10339 // necessary to produce proper semantics with the big-endian-based vperm
10340 // instruction.
10341 EVT EltVT = V1.getValueType().getVectorElementType();
10342 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10343
10344 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10345 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10346
10347 /*
10348 Vectors will be appended like so: [ V1 | v2 ]
10349 XXSWAPD on V1:
10350 [ A | B | C | D ] -> [ C | D | A | B ]
10351 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10352 i.e. index of A, B += 8, and index of C, D -= 8.
10353 XXSWAPD on V2:
10354 [ E | F | G | H ] -> [ G | H | E | F ]
10355 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10356 i.e. index of E, F += 8, index of G, H -= 8
10357 Swap V1 and V2:
10358 [ V1 | V2 ] -> [ V2 | V1 ]
10359 0-15 16-31 0-15 16-31
10360 i.e. index of V1 += 16, index of V2 -= 16
10361 */
10362
10363 SmallVector<SDValue, 16> ResultMask;
10364 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10365 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10366
10367 if (V1HasXXSWAPD) {
10368 if (SrcElt < 8)
10369 SrcElt += 8;
10370 else if (SrcElt < 16)
10371 SrcElt -= 8;
10372 }
10373 if (V2HasXXSWAPD) {
10374 if (SrcElt > 23)
10375 SrcElt -= 8;
10376 else if (SrcElt > 15)
10377 SrcElt += 8;
10378 }
10379 if (NeedSwap) {
10380 if (SrcElt < 16)
10381 SrcElt += 16;
10382 else
10383 SrcElt -= 16;
10384 }
10385 for (unsigned j = 0; j != BytesPerElement; ++j)
10386 if (isLittleEndian)
10387 ResultMask.push_back(
10388 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10389 else
10390 ResultMask.push_back(
10391 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10392 }
10393
10394 if (V1HasXXSWAPD) {
10395 dl = SDLoc(V1->getOperand(0));
10396 V1 = V1->getOperand(0)->getOperand(1);
10397 }
10398 if (V2HasXXSWAPD) {
10399 dl = SDLoc(V2->getOperand(0));
10400 V2 = V2->getOperand(0)->getOperand(1);
10401 }
10402
10403 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10404 if (ValType != MVT::v2f64)
10405 V1 = DAG.getBitcast(MVT::v2f64, V1);
10406 if (V2.getValueType() != MVT::v2f64)
10407 V2 = DAG.getBitcast(MVT::v2f64, V2);
10408 }
10409
10410 ShufflesHandledWithVPERM++;
10411 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10412 LLVM_DEBUG({
10413 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10414 if (Opcode == PPCISD::XXPERM) {
10415 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10416 } else {
10417 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10418 }
10419 SVOp->dump();
10420 dbgs() << "With the following permute control vector:\n";
10421 VPermMask.dump();
10422 });
10423
10424 if (Opcode == PPCISD::XXPERM)
10425 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10426
10427 // Only need to place items backwards in LE,
10428 // the mask was properly calculated.
10429 if (isLittleEndian)
10430 std::swap(V1, V2);
10431
10432 SDValue VPERMNode =
10433 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10434
10435 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10436 return VPERMNode;
10437}
10438
10439/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10440/// vector comparison. If it is, return true and fill in Opc/isDot with
10441/// information about the intrinsic.
10442static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10443 bool &isDot, const PPCSubtarget &Subtarget) {
10444 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10445 CompareOpc = -1;
10446 isDot = false;
10447 switch (IntrinsicID) {
10448 default:
10449 return false;
10450 // Comparison predicates.
10451 case Intrinsic::ppc_altivec_vcmpbfp_p:
10452 CompareOpc = 966;
10453 isDot = true;
10454 break;
10455 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10456 CompareOpc = 198;
10457 isDot = true;
10458 break;
10459 case Intrinsic::ppc_altivec_vcmpequb_p:
10460 CompareOpc = 6;
10461 isDot = true;
10462 break;
10463 case Intrinsic::ppc_altivec_vcmpequh_p:
10464 CompareOpc = 70;
10465 isDot = true;
10466 break;
10467 case Intrinsic::ppc_altivec_vcmpequw_p:
10468 CompareOpc = 134;
10469 isDot = true;
10470 break;
10471 case Intrinsic::ppc_altivec_vcmpequd_p:
10472 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10473 CompareOpc = 199;
10474 isDot = true;
10475 } else
10476 return false;
10477 break;
10478 case Intrinsic::ppc_altivec_vcmpneb_p:
10479 case Intrinsic::ppc_altivec_vcmpneh_p:
10480 case Intrinsic::ppc_altivec_vcmpnew_p:
10481 case Intrinsic::ppc_altivec_vcmpnezb_p:
10482 case Intrinsic::ppc_altivec_vcmpnezh_p:
10483 case Intrinsic::ppc_altivec_vcmpnezw_p:
10484 if (Subtarget.hasP9Altivec()) {
10485 switch (IntrinsicID) {
10486 default:
10487 llvm_unreachable("Unknown comparison intrinsic.");
10488 case Intrinsic::ppc_altivec_vcmpneb_p:
10489 CompareOpc = 7;
10490 break;
10491 case Intrinsic::ppc_altivec_vcmpneh_p:
10492 CompareOpc = 71;
10493 break;
10494 case Intrinsic::ppc_altivec_vcmpnew_p:
10495 CompareOpc = 135;
10496 break;
10497 case Intrinsic::ppc_altivec_vcmpnezb_p:
10498 CompareOpc = 263;
10499 break;
10500 case Intrinsic::ppc_altivec_vcmpnezh_p:
10501 CompareOpc = 327;
10502 break;
10503 case Intrinsic::ppc_altivec_vcmpnezw_p:
10504 CompareOpc = 391;
10505 break;
10506 }
10507 isDot = true;
10508 } else
10509 return false;
10510 break;
10511 case Intrinsic::ppc_altivec_vcmpgefp_p:
10512 CompareOpc = 454;
10513 isDot = true;
10514 break;
10515 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10516 CompareOpc = 710;
10517 isDot = true;
10518 break;
10519 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10520 CompareOpc = 774;
10521 isDot = true;
10522 break;
10523 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10524 CompareOpc = 838;
10525 isDot = true;
10526 break;
10527 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10528 CompareOpc = 902;
10529 isDot = true;
10530 break;
10531 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10532 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10533 CompareOpc = 967;
10534 isDot = true;
10535 } else
10536 return false;
10537 break;
10538 case Intrinsic::ppc_altivec_vcmpgtub_p:
10539 CompareOpc = 518;
10540 isDot = true;
10541 break;
10542 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10543 CompareOpc = 582;
10544 isDot = true;
10545 break;
10546 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10547 CompareOpc = 646;
10548 isDot = true;
10549 break;
10550 case Intrinsic::ppc_altivec_vcmpgtud_p:
10551 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10552 CompareOpc = 711;
10553 isDot = true;
10554 } else
10555 return false;
10556 break;
10557
10558 case Intrinsic::ppc_altivec_vcmpequq:
10559 case Intrinsic::ppc_altivec_vcmpgtsq:
10560 case Intrinsic::ppc_altivec_vcmpgtuq:
10561 if (!Subtarget.isISA3_1())
10562 return false;
10563 switch (IntrinsicID) {
10564 default:
10565 llvm_unreachable("Unknown comparison intrinsic.");
10566 case Intrinsic::ppc_altivec_vcmpequq:
10567 CompareOpc = 455;
10568 break;
10569 case Intrinsic::ppc_altivec_vcmpgtsq:
10570 CompareOpc = 903;
10571 break;
10572 case Intrinsic::ppc_altivec_vcmpgtuq:
10573 CompareOpc = 647;
10574 break;
10575 }
10576 break;
10577
10578 // VSX predicate comparisons use the same infrastructure
10579 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10580 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10581 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10582 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10583 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10584 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10585 if (Subtarget.hasVSX()) {
10586 switch (IntrinsicID) {
10587 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10588 CompareOpc = 99;
10589 break;
10590 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10591 CompareOpc = 115;
10592 break;
10593 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10594 CompareOpc = 107;
10595 break;
10596 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10597 CompareOpc = 67;
10598 break;
10599 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10600 CompareOpc = 83;
10601 break;
10602 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10603 CompareOpc = 75;
10604 break;
10605 }
10606 isDot = true;
10607 } else
10608 return false;
10609 break;
10610
10611 // Normal Comparisons.
10612 case Intrinsic::ppc_altivec_vcmpbfp:
10613 CompareOpc = 966;
10614 break;
10615 case Intrinsic::ppc_altivec_vcmpeqfp:
10616 CompareOpc = 198;
10617 break;
10618 case Intrinsic::ppc_altivec_vcmpequb:
10619 CompareOpc = 6;
10620 break;
10621 case Intrinsic::ppc_altivec_vcmpequh:
10622 CompareOpc = 70;
10623 break;
10624 case Intrinsic::ppc_altivec_vcmpequw:
10625 CompareOpc = 134;
10626 break;
10627 case Intrinsic::ppc_altivec_vcmpequd:
10628 if (Subtarget.hasP8Altivec())
10629 CompareOpc = 199;
10630 else
10631 return false;
10632 break;
10633 case Intrinsic::ppc_altivec_vcmpneb:
10634 case Intrinsic::ppc_altivec_vcmpneh:
10635 case Intrinsic::ppc_altivec_vcmpnew:
10636 case Intrinsic::ppc_altivec_vcmpnezb:
10637 case Intrinsic::ppc_altivec_vcmpnezh:
10638 case Intrinsic::ppc_altivec_vcmpnezw:
10639 if (Subtarget.hasP9Altivec())
10640 switch (IntrinsicID) {
10641 default:
10642 llvm_unreachable("Unknown comparison intrinsic.");
10643 case Intrinsic::ppc_altivec_vcmpneb:
10644 CompareOpc = 7;
10645 break;
10646 case Intrinsic::ppc_altivec_vcmpneh:
10647 CompareOpc = 71;
10648 break;
10649 case Intrinsic::ppc_altivec_vcmpnew:
10650 CompareOpc = 135;
10651 break;
10652 case Intrinsic::ppc_altivec_vcmpnezb:
10653 CompareOpc = 263;
10654 break;
10655 case Intrinsic::ppc_altivec_vcmpnezh:
10656 CompareOpc = 327;
10657 break;
10658 case Intrinsic::ppc_altivec_vcmpnezw:
10659 CompareOpc = 391;
10660 break;
10661 }
10662 else
10663 return false;
10664 break;
10665 case Intrinsic::ppc_altivec_vcmpgefp:
10666 CompareOpc = 454;
10667 break;
10668 case Intrinsic::ppc_altivec_vcmpgtfp:
10669 CompareOpc = 710;
10670 break;
10671 case Intrinsic::ppc_altivec_vcmpgtsb:
10672 CompareOpc = 774;
10673 break;
10674 case Intrinsic::ppc_altivec_vcmpgtsh:
10675 CompareOpc = 838;
10676 break;
10677 case Intrinsic::ppc_altivec_vcmpgtsw:
10678 CompareOpc = 902;
10679 break;
10680 case Intrinsic::ppc_altivec_vcmpgtsd:
10681 if (Subtarget.hasP8Altivec())
10682 CompareOpc = 967;
10683 else
10684 return false;
10685 break;
10686 case Intrinsic::ppc_altivec_vcmpgtub:
10687 CompareOpc = 518;
10688 break;
10689 case Intrinsic::ppc_altivec_vcmpgtuh:
10690 CompareOpc = 582;
10691 break;
10692 case Intrinsic::ppc_altivec_vcmpgtuw:
10693 CompareOpc = 646;
10694 break;
10695 case Intrinsic::ppc_altivec_vcmpgtud:
10696 if (Subtarget.hasP8Altivec())
10697 CompareOpc = 711;
10698 else
10699 return false;
10700 break;
10701 case Intrinsic::ppc_altivec_vcmpequq_p:
10702 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10703 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10704 if (!Subtarget.isISA3_1())
10705 return false;
10706 switch (IntrinsicID) {
10707 default:
10708 llvm_unreachable("Unknown comparison intrinsic.");
10709 case Intrinsic::ppc_altivec_vcmpequq_p:
10710 CompareOpc = 455;
10711 break;
10712 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10713 CompareOpc = 903;
10714 break;
10715 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10716 CompareOpc = 647;
10717 break;
10718 }
10719 isDot = true;
10720 break;
10721 }
10722 return true;
10723}
10724
10725/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10726/// lower, do it, otherwise return null.
10727SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10728 SelectionDAG &DAG) const {
10729 unsigned IntrinsicID = Op.getConstantOperandVal(0);
10730
10731 SDLoc dl(Op);
10732
10733 switch (IntrinsicID) {
10734 case Intrinsic::thread_pointer:
10735 // Reads the thread pointer register, used for __builtin_thread_pointer.
10736 if (Subtarget.isPPC64())
10737 return DAG.getRegister(PPC::X13, MVT::i64);
10738 return DAG.getRegister(PPC::R2, MVT::i32);
10739
10740 case Intrinsic::ppc_mma_disassemble_acc: {
10741 if (Subtarget.isISAFuture()) {
10742 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
10743 SDValue WideVec = SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl,
10744 ArrayRef(ReturnTypes, 2),
10745 Op.getOperand(1)),
10746 0);
10748 SDValue Value = SDValue(WideVec.getNode(), 0);
10749 SDValue Value2 = SDValue(WideVec.getNode(), 1);
10750
10751 SDValue Extract;
10752 Extract = DAG.getNode(
10753 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10754 Subtarget.isLittleEndian() ? Value2 : Value,
10755 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10756 dl, getPointerTy(DAG.getDataLayout())));
10757 RetOps.push_back(Extract);
10758 Extract = DAG.getNode(
10759 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10760 Subtarget.isLittleEndian() ? Value2 : Value,
10761 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10762 dl, getPointerTy(DAG.getDataLayout())));
10763 RetOps.push_back(Extract);
10764 Extract = DAG.getNode(
10765 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10766 Subtarget.isLittleEndian() ? Value : Value2,
10767 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10768 dl, getPointerTy(DAG.getDataLayout())));
10769 RetOps.push_back(Extract);
10770 Extract = DAG.getNode(
10771 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10772 Subtarget.isLittleEndian() ? Value : Value2,
10773 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10774 dl, getPointerTy(DAG.getDataLayout())));
10775 RetOps.push_back(Extract);
10776 return DAG.getMergeValues(RetOps, dl);
10777 }
10778 [[fallthrough]];
10779 }
10780 case Intrinsic::ppc_vsx_disassemble_pair: {
10781 int NumVecs = 2;
10782 SDValue WideVec = Op.getOperand(1);
10783 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10784 NumVecs = 4;
10785 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10786 }
10788 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10789 SDValue Extract = DAG.getNode(
10790 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10791 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10792 : VecNo,
10793 dl, getPointerTy(DAG.getDataLayout())));
10794 RetOps.push_back(Extract);
10795 }
10796 return DAG.getMergeValues(RetOps, dl);
10797 }
10798
10799 case Intrinsic::ppc_mma_xxmfacc:
10800 case Intrinsic::ppc_mma_xxmtacc: {
10801 // Allow pre-isa-future subtargets to lower as normal.
10802 if (!Subtarget.isISAFuture())
10803 return SDValue();
10804 // The intrinsics for xxmtacc and xxmfacc take one argument of
10805 // type v512i1, for future cpu the corresponding wacc instruction
10806 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
10807 // the need to produce the xxm[t|f]acc.
10808 SDValue WideVec = Op.getOperand(1);
10809 DAG.ReplaceAllUsesWith(Op, WideVec);
10810 return SDValue();
10811 }
10812
10813 case Intrinsic::ppc_unpack_longdouble: {
10814 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10815 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
10816 "Argument of long double unpack must be 0 or 1!");
10817 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
10818 DAG.getConstant(!!(Idx->getSExtValue()), dl,
10819 Idx->getValueType(0)));
10820 }
10821
10822 case Intrinsic::ppc_compare_exp_lt:
10823 case Intrinsic::ppc_compare_exp_gt:
10824 case Intrinsic::ppc_compare_exp_eq:
10825 case Intrinsic::ppc_compare_exp_uo: {
10826 unsigned Pred;
10827 switch (IntrinsicID) {
10828 case Intrinsic::ppc_compare_exp_lt:
10829 Pred = PPC::PRED_LT;
10830 break;
10831 case Intrinsic::ppc_compare_exp_gt:
10832 Pred = PPC::PRED_GT;
10833 break;
10834 case Intrinsic::ppc_compare_exp_eq:
10835 Pred = PPC::PRED_EQ;
10836 break;
10837 case Intrinsic::ppc_compare_exp_uo:
10838 Pred = PPC::PRED_UN;
10839 break;
10840 }
10841 return SDValue(
10842 DAG.getMachineNode(
10843 PPC::SELECT_CC_I4, dl, MVT::i32,
10844 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
10845 Op.getOperand(1), Op.getOperand(2)),
10846 0),
10847 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10848 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
10849 0);
10850 }
10851 case Intrinsic::ppc_test_data_class: {
10852 EVT OpVT = Op.getOperand(1).getValueType();
10853 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
10854 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
10855 : PPC::XSTSTDCSP);
10856 return SDValue(
10857 DAG.getMachineNode(
10858 PPC::SELECT_CC_I4, dl, MVT::i32,
10859 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
10860 Op.getOperand(1)),
10861 0),
10862 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10863 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
10864 0);
10865 }
10866 case Intrinsic::ppc_fnmsub: {
10867 EVT VT = Op.getOperand(1).getValueType();
10868 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
10869 return DAG.getNode(
10870 ISD::FNEG, dl, VT,
10871 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
10872 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
10873 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
10874 Op.getOperand(2), Op.getOperand(3));
10875 }
10876 case Intrinsic::ppc_convert_f128_to_ppcf128:
10877 case Intrinsic::ppc_convert_ppcf128_to_f128: {
10878 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
10879 ? RTLIB::CONVERT_PPCF128_F128
10880 : RTLIB::CONVERT_F128_PPCF128;
10881 MakeLibCallOptions CallOptions;
10882 std::pair<SDValue, SDValue> Result =
10883 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
10884 dl, SDValue());
10885 return Result.first;
10886 }
10887 case Intrinsic::ppc_maxfe:
10888 case Intrinsic::ppc_maxfl:
10889 case Intrinsic::ppc_maxfs:
10890 case Intrinsic::ppc_minfe:
10891 case Intrinsic::ppc_minfl:
10892 case Intrinsic::ppc_minfs: {
10893 EVT VT = Op.getValueType();
10894 assert(
10895 all_of(Op->ops().drop_front(4),
10896 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
10897 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
10898 (void)VT;
10900 if (IntrinsicID == Intrinsic::ppc_minfe ||
10901 IntrinsicID == Intrinsic::ppc_minfl ||
10902 IntrinsicID == Intrinsic::ppc_minfs)
10903 CC = ISD::SETLT;
10904 unsigned I = Op.getNumOperands() - 2, Cnt = I;
10905 SDValue Res = Op.getOperand(I);
10906 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
10907 Res =
10908 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
10909 }
10910 return Res;
10911 }
10912 }
10913
10914 // If this is a lowered altivec predicate compare, CompareOpc is set to the
10915 // opcode number of the comparison.
10916 int CompareOpc;
10917 bool isDot;
10918 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
10919 return SDValue(); // Don't custom lower most intrinsics.
10920
10921 // If this is a non-dot comparison, make the VCMP node and we are done.
10922 if (!isDot) {
10923 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
10924 Op.getOperand(1), Op.getOperand(2),
10925 DAG.getConstant(CompareOpc, dl, MVT::i32));
10926 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
10927 }
10928
10929 // Create the PPCISD altivec 'dot' comparison node.
10930 SDValue Ops[] = {
10931 Op.getOperand(2), // LHS
10932 Op.getOperand(3), // RHS
10933 DAG.getConstant(CompareOpc, dl, MVT::i32)
10934 };
10935 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
10936 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
10937
10938 // Now that we have the comparison, emit a copy from the CR to a GPR.
10939 // This is flagged to the above dot comparison.
10940 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
10941 DAG.getRegister(PPC::CR6, MVT::i32),
10942 CompNode.getValue(1));
10943
10944 // Unpack the result based on how the target uses it.
10945 unsigned BitNo; // Bit # of CR6.
10946 bool InvertBit; // Invert result?
10947 switch (Op.getConstantOperandVal(1)) {
10948 default: // Can't happen, don't crash on invalid number though.
10949 case 0: // Return the value of the EQ bit of CR6.
10950 BitNo = 0; InvertBit = false;
10951 break;
10952 case 1: // Return the inverted value of the EQ bit of CR6.
10953 BitNo = 0; InvertBit = true;
10954 break;
10955 case 2: // Return the value of the LT bit of CR6.
10956 BitNo = 2; InvertBit = false;
10957 break;
10958 case 3: // Return the inverted value of the LT bit of CR6.
10959 BitNo = 2; InvertBit = true;
10960 break;
10961 }
10962
10963 // Shift the bit into the low position.
10964 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
10965 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
10966 // Isolate the bit.
10967 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
10968 DAG.getConstant(1, dl, MVT::i32));
10969
10970 // If we are supposed to, toggle the bit.
10971 if (InvertBit)
10972 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
10973 DAG.getConstant(1, dl, MVT::i32));
10974 return Flags;
10975}
10976
10977SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10978 SelectionDAG &DAG) const {
10979 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10980 // the beginning of the argument list.
10981 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10982 SDLoc DL(Op);
10983 switch (Op.getConstantOperandVal(ArgStart)) {
10984 case Intrinsic::ppc_cfence: {
10985 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
10986 SDValue Val = Op.getOperand(ArgStart + 1);
10987 EVT Ty = Val.getValueType();
10988 if (Ty == MVT::i128) {
10989 // FIXME: Testing one of two paired registers is sufficient to guarantee
10990 // ordering?
10991 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
10992 }
10993 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
10994 EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
10995 return SDValue(
10996 DAG.getMachineNode(Opcode, DL, MVT::Other,
10997 DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val),
10998 Op.getOperand(0)),
10999 0);
11000 }
11001 default:
11002 break;
11003 }
11004 return SDValue();
11005}
11006
11007// Lower scalar BSWAP64 to xxbrd.
11008SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11009 SDLoc dl(Op);
11010 if (!Subtarget.isPPC64())
11011 return Op;
11012 // MTVSRDD
11013 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11014 Op.getOperand(0));
11015 // XXBRD
11016 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11017 // MFVSRD
11018 int VectorIndex = 0;
11019 if (Subtarget.isLittleEndian())
11020 VectorIndex = 1;
11021 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11022 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11023 return Op;
11024}
11025
11026// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11027// compared to a value that is atomically loaded (atomic loads zero-extend).
11028SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11029 SelectionDAG &DAG) const {
11030 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11031 "Expecting an atomic compare-and-swap here.");
11032 SDLoc dl(Op);
11033 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11034 EVT MemVT = AtomicNode->getMemoryVT();
11035 if (MemVT.getSizeInBits() >= 32)
11036 return Op;
11037
11038 SDValue CmpOp = Op.getOperand(2);
11039 // If this is already correctly zero-extended, leave it alone.
11040 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11041 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11042 return Op;
11043
11044 // Clear the high bits of the compare operand.
11045 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11046 SDValue NewCmpOp =
11047 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11048 DAG.getConstant(MaskVal, dl, MVT::i32));
11049
11050 // Replace the existing compare operand with the properly zero-extended one.
11052 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11053 Ops.push_back(AtomicNode->getOperand(i));
11054 Ops[2] = NewCmpOp;
11055 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11056 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11057 auto NodeTy =
11059 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11060}
11061
11062SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11063 SelectionDAG &DAG) const {
11064 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11065 EVT MemVT = N->getMemoryVT();
11066 assert(MemVT.getSimpleVT() == MVT::i128 &&
11067 "Expect quadword atomic operations");
11068 SDLoc dl(N);
11069 unsigned Opc = N->getOpcode();
11070 switch (Opc) {
11071 case ISD::ATOMIC_LOAD: {
11072 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11073 // lowered to ppc instructions by pattern matching instruction selector.
11074 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11076 N->getOperand(0),
11077 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11078 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11079 Ops.push_back(N->getOperand(I));
11080 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11081 Ops, MemVT, N->getMemOperand());
11082 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11083 SDValue ValHi =
11084 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11085 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11086 DAG.getConstant(64, dl, MVT::i32));
11087 SDValue Val =
11088 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11089 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11090 {Val, LoadedVal.getValue(2)});
11091 }
11092 case ISD::ATOMIC_STORE: {
11093 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11094 // lowered to ppc instructions by pattern matching instruction selector.
11095 SDVTList Tys = DAG.getVTList(MVT::Other);
11097 N->getOperand(0),
11098 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11099 SDValue Val = N->getOperand(1);
11100 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11101 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11102 DAG.getConstant(64, dl, MVT::i32));
11103 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11104 Ops.push_back(ValLo);
11105 Ops.push_back(ValHi);
11106 Ops.push_back(N->getOperand(2));
11107 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11108 N->getMemOperand());
11109 }
11110 default:
11111 llvm_unreachable("Unexpected atomic opcode");
11112 }
11113}
11114
11116 SelectionDAG &DAG,
11117 const PPCSubtarget &Subtarget) {
11118 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11119
11120 enum DataClassMask {
11121 DC_NAN = 1 << 6,
11122 DC_NEG_INF = 1 << 4,
11123 DC_POS_INF = 1 << 5,
11124 DC_NEG_ZERO = 1 << 2,
11125 DC_POS_ZERO = 1 << 3,
11126 DC_NEG_SUBNORM = 1,
11127 DC_POS_SUBNORM = 1 << 1,
11128 };
11129
11130 EVT VT = Op.getValueType();
11131
11132 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11133 : VT == MVT::f64 ? PPC::XSTSTDCDP
11134 : PPC::XSTSTDCSP;
11135
11136 if (Mask == fcAllFlags)
11137 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11138 if (Mask == 0)
11139 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11140
11141 // When it's cheaper or necessary to test reverse flags.
11142 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11143 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11144 return DAG.getNOT(Dl, Rev, MVT::i1);
11145 }
11146
11147 // Power doesn't support testing whether a value is 'normal'. Test the rest
11148 // first, and test if it's 'not not-normal' with expected sign.
11149 if (Mask & fcNormal) {
11150 SDValue Rev(DAG.getMachineNode(
11151 TestOp, Dl, MVT::i32,
11152 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11153 DC_NEG_ZERO | DC_POS_ZERO |
11154 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11155 Dl, MVT::i32),
11156 Op),
11157 0);
11158 // Sign are stored in CR bit 0, result are in CR bit 2.
11159 SDValue Sign(
11160 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11161 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11162 0);
11163 SDValue Normal(DAG.getNOT(
11164 Dl,
11166 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11167 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11168 0),
11169 MVT::i1));
11170 if (Mask & fcPosNormal)
11171 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11172 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11173 if (Mask == fcPosNormal || Mask == fcNegNormal)
11174 return Result;
11175
11176 return DAG.getNode(
11177 ISD::OR, Dl, MVT::i1,
11178 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11179 }
11180
11181 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11182 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11183 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11184 bool IsQuiet = Mask & fcQNan;
11185 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11186
11187 // Quietness is determined by the first bit in fraction field.
11188 uint64_t QuietMask = 0;
11189 SDValue HighWord;
11190 if (VT == MVT::f128) {
11191 HighWord = DAG.getNode(
11192 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11193 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11194 QuietMask = 0x8000;
11195 } else if (VT == MVT::f64) {
11196 if (Subtarget.isPPC64()) {
11197 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11198 DAG.getBitcast(MVT::i64, Op),
11199 DAG.getConstant(1, Dl, MVT::i32));
11200 } else {
11201 SDValue Vec = DAG.getBitcast(
11202 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11203 HighWord = DAG.getNode(
11204 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11205 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11206 }
11207 QuietMask = 0x80000;
11208 } else if (VT == MVT::f32) {
11209 HighWord = DAG.getBitcast(MVT::i32, Op);
11210 QuietMask = 0x400000;
11211 }
11212 SDValue NanRes = DAG.getSetCC(
11213 Dl, MVT::i1,
11214 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11215 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11216 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11217 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11218 if (Mask == fcQNan || Mask == fcSNan)
11219 return NanRes;
11220
11221 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11222 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11223 NanRes);
11224 }
11225
11226 unsigned NativeMask = 0;
11227 if ((Mask & fcNan) == fcNan)
11228 NativeMask |= DC_NAN;
11229 if (Mask & fcNegInf)
11230 NativeMask |= DC_NEG_INF;
11231 if (Mask & fcPosInf)
11232 NativeMask |= DC_POS_INF;
11233 if (Mask & fcNegZero)
11234 NativeMask |= DC_NEG_ZERO;
11235 if (Mask & fcPosZero)
11236 NativeMask |= DC_POS_ZERO;
11237 if (Mask & fcNegSubnormal)
11238 NativeMask |= DC_NEG_SUBNORM;
11239 if (Mask & fcPosSubnormal)
11240 NativeMask |= DC_POS_SUBNORM;
11241 return SDValue(
11242 DAG.getMachineNode(
11243 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11245 TestOp, Dl, MVT::i32,
11246 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11247 0),
11248 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11249 0);
11250}
11251
11252SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11253 SelectionDAG &DAG) const {
11254 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11255 SDValue LHS = Op.getOperand(0);
11256 const auto *RHS = cast<ConstantSDNode>(Op.getOperand(1));
11257 SDLoc Dl(Op);
11258 FPClassTest Category = static_cast<FPClassTest>(RHS->getZExtValue());
11259 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11260}
11261
11262SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11263 SelectionDAG &DAG) const {
11264 SDLoc dl(Op);
11265 // Create a stack slot that is 16-byte aligned.
11267 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11268 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11269 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11270
11271 // Store the input value into Value#0 of the stack slot.
11272 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
11274 // Load it out.
11275 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11276}
11277
11278SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11279 SelectionDAG &DAG) const {
11280 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11281 "Should only be called for ISD::INSERT_VECTOR_ELT");
11282
11283 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11284
11285 EVT VT = Op.getValueType();
11286 SDLoc dl(Op);
11287 SDValue V1 = Op.getOperand(0);
11288 SDValue V2 = Op.getOperand(1);
11289
11290 if (VT == MVT::v2f64 && C)
11291 return Op;
11292
11293 if (Subtarget.hasP9Vector()) {
11294 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11295 // because on P10, it allows this specific insert_vector_elt load pattern to
11296 // utilize the refactored load and store infrastructure in order to exploit
11297 // prefixed loads.
11298 // On targets with inexpensive direct moves (Power9 and up), a
11299 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11300 // load since a single precision load will involve conversion to double
11301 // precision on the load followed by another conversion to single precision.
11302 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11303 (isa<LoadSDNode>(V2))) {
11304 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11305 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11306 SDValue InsVecElt =
11307 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11308 BitcastLoad, Op.getOperand(2));
11309 return DAG.getBitcast(MVT::v4f32, InsVecElt);
11310 }
11311 }
11312
11313 if (Subtarget.isISA3_1()) {
11314 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11315 return SDValue();
11316 // On P10, we have legal lowering for constant and variable indices for
11317 // all vectors.
11318 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11319 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11320 return Op;
11321 }
11322
11323 // Before P10, we have legal lowering for constant indices but not for
11324 // variable ones.
11325 if (!C)
11326 return SDValue();
11327
11328 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11329 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11330 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11331 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11332 unsigned InsertAtElement = C->getZExtValue();
11333 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11334 if (Subtarget.isLittleEndian()) {
11335 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11336 }
11337 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11338 DAG.getConstant(InsertAtByte, dl, MVT::i32));
11339 }
11340 return Op;
11341}
11342
11343SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11344 SelectionDAG &DAG) const {
11345 SDLoc dl(Op);
11346 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11347 SDValue LoadChain = LN->getChain();
11348 SDValue BasePtr = LN->getBasePtr();
11349 EVT VT = Op.getValueType();
11350
11351 if (VT != MVT::v256i1 && VT != MVT::v512i1)
11352 return Op;
11353
11354 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11355 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11356 // 2 or 4 vsx registers.
11357 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
11358 "Type unsupported without MMA");
11359 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11360 "Type unsupported without paired vector support");
11361 Align Alignment = LN->getAlign();
11363 SmallVector<SDValue, 4> LoadChains;
11364 unsigned NumVecs = VT.getSizeInBits() / 128;
11365 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11366 SDValue Load =
11367 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11368 LN->getPointerInfo().getWithOffset(Idx * 16),
11369 commonAlignment(Alignment, Idx * 16),
11370 LN->getMemOperand()->getFlags(), LN->getAAInfo());
11371 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11372 DAG.getConstant(16, dl, BasePtr.getValueType()));
11373 Loads.push_back(Load);
11374 LoadChains.push_back(Load.getValue(1));
11375 }
11376 if (Subtarget.isLittleEndian()) {
11377 std::reverse(Loads.begin(), Loads.end());
11378 std::reverse(LoadChains.begin(), LoadChains.end());
11379 }
11380 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11381 SDValue Value =
11382 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11383 dl, VT, Loads);
11384 SDValue RetOps[] = {Value, TF};
11385 return DAG.getMergeValues(RetOps, dl);
11386}
11387
11388SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11389 SelectionDAG &DAG) const {
11390 SDLoc dl(Op);
11391 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11392 SDValue StoreChain = SN->getChain();
11393 SDValue BasePtr = SN->getBasePtr();
11394 SDValue Value = SN->getValue();
11395 SDValue Value2 = SN->getValue();
11396 EVT StoreVT = Value.getValueType();
11397
11398 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11399 return Op;
11400
11401 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11402 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11403 // underlying registers individually.
11404 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
11405 "Type unsupported without MMA");
11406 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11407 "Type unsupported without paired vector support");
11408 Align Alignment = SN->getAlign();
11410 unsigned NumVecs = 2;
11411 if (StoreVT == MVT::v512i1) {
11412 if (Subtarget.isISAFuture()) {
11413 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11414 MachineSDNode *ExtNode = DAG.getMachineNode(
11415 PPC::DMXXEXTFDMR512, dl, ArrayRef(ReturnTypes, 2), Op.getOperand(1));
11416
11417 Value = SDValue(ExtNode, 0);
11418 Value2 = SDValue(ExtNode, 1);
11419 } else
11420 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
11421 NumVecs = 4;
11422 }
11423 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11424 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11425 SDValue Elt;
11426 if (Subtarget.isISAFuture()) {
11427 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11428 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11429 Idx > 1 ? Value2 : Value,
11430 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11431 } else
11432 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
11433 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11434
11435 SDValue Store =
11436 DAG.getStore(StoreChain, dl, Elt, BasePtr,
11437 SN->getPointerInfo().getWithOffset(Idx * 16),
11438 commonAlignment(Alignment, Idx * 16),
11439 SN->getMemOperand()->getFlags(), SN->getAAInfo());
11440 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11441 DAG.getConstant(16, dl, BasePtr.getValueType()));
11442 Stores.push_back(Store);
11443 }
11444 SDValue TF = DAG.getTokenFactor(dl, Stores);
11445 return TF;
11446}
11447
11448SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11449 SDLoc dl(Op);
11450 if (Op.getValueType() == MVT::v4i32) {
11451 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11452
11453 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
11454 // +16 as shift amt.
11455 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
11456 SDValue RHSSwap = // = vrlw RHS, 16
11457 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11458
11459 // Shrinkify inputs to v8i16.
11460 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11461 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11462 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11463
11464 // Low parts multiplied together, generating 32-bit results (we ignore the
11465 // top parts).
11466 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11467 LHS, RHS, DAG, dl, MVT::v4i32);
11468
11469 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11470 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11471 // Shift the high parts up 16 bits.
11472 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11473 Neg16, DAG, dl);
11474 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11475 } else if (Op.getValueType() == MVT::v16i8) {
11476 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11477 bool isLittleEndian = Subtarget.isLittleEndian();
11478
11479 // Multiply the even 8-bit parts, producing 16-bit sums.
11480 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11481 LHS, RHS, DAG, dl, MVT::v8i16);
11482 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11483
11484 // Multiply the odd 8-bit parts, producing 16-bit sums.
11485 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11486 LHS, RHS, DAG, dl, MVT::v8i16);
11487 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11488
11489 // Merge the results together. Because vmuleub and vmuloub are
11490 // instructions with a big-endian bias, we must reverse the
11491 // element numbering and reverse the meaning of "odd" and "even"
11492 // when generating little endian code.
11493 int Ops[16];
11494 for (unsigned i = 0; i != 8; ++i) {
11495 if (isLittleEndian) {
11496 Ops[i*2 ] = 2*i;
11497 Ops[i*2+1] = 2*i+16;
11498 } else {
11499 Ops[i*2 ] = 2*i+1;
11500 Ops[i*2+1] = 2*i+1+16;
11501 }
11502 }
11503 if (isLittleEndian)
11504 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11505 else
11506 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11507 } else {
11508 llvm_unreachable("Unknown mul to lower!");
11509 }
11510}
11511
11512SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11513 bool IsStrict = Op->isStrictFPOpcode();
11514 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11515 !Subtarget.hasP9Vector())
11516 return SDValue();
11517
11518 return Op;
11519}
11520
11521// Custom lowering for fpext vf32 to v2f64
11522SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11523
11524 assert(Op.getOpcode() == ISD::FP_EXTEND &&
11525 "Should only be called for ISD::FP_EXTEND");
11526
11527 // FIXME: handle extends from half precision float vectors on P9.
11528 // We only want to custom lower an extend from v2f32 to v2f64.
11529 if (Op.getValueType() != MVT::v2f64 ||
11530 Op.getOperand(0).getValueType() != MVT::v2f32)
11531 return SDValue();
11532
11533 SDLoc dl(Op);
11534 SDValue Op0 = Op.getOperand(0);
11535
11536 switch (Op0.getOpcode()) {
11537 default:
11538 return SDValue();
11540 assert(Op0.getNumOperands() == 2 &&
11541 isa<ConstantSDNode>(Op0->getOperand(1)) &&
11542 "Node should have 2 operands with second one being a constant!");
11543
11544 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11545 return SDValue();
11546
11547 // Custom lower is only done for high or low doubleword.
11548 int Idx = Op0.getConstantOperandVal(1);
11549 if (Idx % 2 != 0)
11550 return SDValue();
11551
11552 // Since input is v4f32, at this point Idx is either 0 or 2.
11553 // Shift to get the doubleword position we want.
11554 int DWord = Idx >> 1;
11555
11556 // High and low word positions are different on little endian.
11557 if (Subtarget.isLittleEndian())
11558 DWord ^= 0x1;
11559
11560 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11561 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11562 }
11563 case ISD::FADD:
11564 case ISD::FMUL:
11565 case ISD::FSUB: {
11566 SDValue NewLoad[2];
11567 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11568 // Ensure both input are loads.
11569 SDValue LdOp = Op0.getOperand(i);
11570 if (LdOp.getOpcode() != ISD::LOAD)
11571 return SDValue();
11572 // Generate new load node.
11573 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
11574 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11575 NewLoad[i] = DAG.getMemIntrinsicNode(
11576 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11577 LD->getMemoryVT(), LD->getMemOperand());
11578 }
11579 SDValue NewOp =
11580 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11581 NewLoad[1], Op0.getNode()->getFlags());
11582 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11583 DAG.getConstant(0, dl, MVT::i32));
11584 }
11585 case ISD::LOAD: {
11586 LoadSDNode *LD = cast<LoadSDNode>(Op0);
11587 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11588 SDValue NewLd = DAG.getMemIntrinsicNode(
11589 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11590 LD->getMemoryVT(), LD->getMemOperand());
11591 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11592 DAG.getConstant(0, dl, MVT::i32));
11593 }
11594 }
11595 llvm_unreachable("ERROR:Should return for all cases within swtich.");
11596}
11597
11598/// LowerOperation - Provide custom lowering hooks for some operations.
11599///
11601 switch (Op.getOpcode()) {
11602 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11603 case ISD::FPOW: return lowerPow(Op, DAG);
11604 case ISD::FSIN: return lowerSin(Op, DAG);
11605 case ISD::FCOS: return lowerCos(Op, DAG);
11606 case ISD::FLOG: return lowerLog(Op, DAG);
11607 case ISD::FLOG10: return lowerLog10(Op, DAG);
11608 case ISD::FEXP: return lowerExp(Op, DAG);
11609 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
11610 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
11611 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
11612 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
11613 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
11614 case ISD::STRICT_FSETCC:
11616 case ISD::SETCC: return LowerSETCC(Op, DAG);
11617 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
11618 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
11619
11620 case ISD::INLINEASM:
11621 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
11622 // Variable argument lowering.
11623 case ISD::VASTART: return LowerVASTART(Op, DAG);
11624 case ISD::VAARG: return LowerVAARG(Op, DAG);
11625 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
11626
11627 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
11628 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11630 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11631
11632 // Exception handling lowering.
11633 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
11634 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
11635 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
11636
11637 case ISD::LOAD: return LowerLOAD(Op, DAG);
11638 case ISD::STORE: return LowerSTORE(Op, DAG);
11639 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
11640 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
11643 case ISD::FP_TO_UINT:
11644 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11647 case ISD::UINT_TO_FP:
11648 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
11649 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
11650
11651 // Lower 64-bit shifts.
11652 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
11653 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
11654 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
11655
11656 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
11657 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
11658
11659 // Vector-related lowering.
11660 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
11661 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
11662 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11663 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
11664 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
11665 case ISD::MUL: return LowerMUL(Op, DAG);
11666 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
11668 case ISD::FP_ROUND:
11669 return LowerFP_ROUND(Op, DAG);
11670 case ISD::ROTL: return LowerROTL(Op, DAG);
11671
11672 // For counter-based loop handling.
11673 case ISD::INTRINSIC_W_CHAIN: return SDValue();
11674
11675 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
11676
11677 // Frame & Return address.
11678 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
11679 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
11680
11682 return LowerINTRINSIC_VOID(Op, DAG);
11683 case ISD::BSWAP:
11684 return LowerBSWAP(Op, DAG);
11686 return LowerATOMIC_CMP_SWAP(Op, DAG);
11687 case ISD::ATOMIC_STORE:
11688 return LowerATOMIC_LOAD_STORE(Op, DAG);
11689 case ISD::IS_FPCLASS:
11690 return LowerIS_FPCLASS(Op, DAG);
11691 }
11692}
11693
11696 SelectionDAG &DAG) const {
11697 SDLoc dl(N);
11698 switch (N->getOpcode()) {
11699 default:
11700 llvm_unreachable("Do not know how to custom type legalize this operation!");
11701 case ISD::ATOMIC_LOAD: {
11702 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
11703 Results.push_back(Res);
11704 Results.push_back(Res.getValue(1));
11705 break;
11706 }
11707 case ISD::READCYCLECOUNTER: {
11708 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11709 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11710
11711 Results.push_back(
11712 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11713 Results.push_back(RTB.getValue(2));
11714 break;
11715 }
11717 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
11718 break;
11719
11720 assert(N->getValueType(0) == MVT::i1 &&
11721 "Unexpected result type for CTR decrement intrinsic");
11722 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11723 N->getValueType(0));
11724 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11725 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11726 N->getOperand(1));
11727
11728 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11729 Results.push_back(NewInt.getValue(1));
11730 break;
11731 }
11733 switch (N->getConstantOperandVal(0)) {
11734 case Intrinsic::ppc_pack_longdouble:
11735 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
11736 N->getOperand(2), N->getOperand(1)));
11737 break;
11738 case Intrinsic::ppc_maxfe:
11739 case Intrinsic::ppc_minfe:
11740 case Intrinsic::ppc_fnmsub:
11741 case Intrinsic::ppc_convert_f128_to_ppcf128:
11742 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11743 break;
11744 }
11745 break;
11746 }
11747 case ISD::VAARG: {
11748 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11749 return;
11750
11751 EVT VT = N->getValueType(0);
11752
11753 if (VT == MVT::i64) {
11754 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11755
11756 Results.push_back(NewNode);
11757 Results.push_back(NewNode.getValue(1));
11758 }
11759 return;
11760 }
11763 case ISD::FP_TO_SINT:
11764 case ISD::FP_TO_UINT: {
11765 // LowerFP_TO_INT() can only handle f32 and f64.
11766 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11767 MVT::ppcf128)
11768 return;
11769 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
11770 Results.push_back(LoweredValue);
11771 if (N->isStrictFPOpcode())
11772 Results.push_back(LoweredValue.getValue(1));
11773 return;
11774 }
11775 case ISD::TRUNCATE: {
11776 if (!N->getValueType(0).isVector())
11777 return;
11778 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11779 if (Lowered)
11780 Results.push_back(Lowered);
11781 return;
11782 }
11783 case ISD::FSHL:
11784 case ISD::FSHR:
11785 // Don't handle funnel shifts here.
11786 return;
11787 case ISD::BITCAST:
11788 // Don't handle bitcast here.
11789 return;
11790 case ISD::FP_EXTEND:
11791 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11792 if (Lowered)
11793 Results.push_back(Lowered);
11794 return;
11795 }
11796}
11797
11798//===----------------------------------------------------------------------===//
11799// Other Lowering Code
11800//===----------------------------------------------------------------------===//
11801
11803 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11804 Function *Func = Intrinsic::getDeclaration(M, Id);
11805 return Builder.CreateCall(Func, {});
11806}
11807
11808// The mappings for emitLeading/TrailingFence is taken from
11809// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11811 Instruction *Inst,
11812 AtomicOrdering Ord) const {
11814 return callIntrinsic(Builder, Intrinsic::ppc_sync);
11815 if (isReleaseOrStronger(Ord))
11816 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11817 return nullptr;
11818}
11819
11821 Instruction *Inst,
11822 AtomicOrdering Ord) const {
11823 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
11824 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11825 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11826 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11827 if (isa<LoadInst>(Inst))
11828 return Builder.CreateCall(
11830 Builder.GetInsertBlock()->getParent()->getParent(),
11831 Intrinsic::ppc_cfence, {Inst->getType()}),
11832 {Inst});
11833 // FIXME: Can use isync for rmw operation.
11834 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11835 }
11836 return nullptr;
11837}
11838
11841 unsigned AtomicSize,
11842 unsigned BinOpcode,
11843 unsigned CmpOpcode,
11844 unsigned CmpPred) const {
11845 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11846 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11847
11848 auto LoadMnemonic = PPC::LDARX;
11849 auto StoreMnemonic = PPC::STDCX;
11850 switch (AtomicSize) {
11851 default:
11852 llvm_unreachable("Unexpected size of atomic entity");
11853 case 1:
11854 LoadMnemonic = PPC::LBARX;
11855 StoreMnemonic = PPC::STBCX;
11856 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11857 break;
11858 case 2:
11859 LoadMnemonic = PPC::LHARX;
11860 StoreMnemonic = PPC::STHCX;
11861 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11862 break;
11863 case 4:
11864 LoadMnemonic = PPC::LWARX;
11865 StoreMnemonic = PPC::STWCX;
11866 break;
11867 case 8:
11868 LoadMnemonic = PPC::LDARX;
11869 StoreMnemonic = PPC::STDCX;
11870 break;
11871 }
11872
11873 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11874 MachineFunction *F = BB->getParent();
11876
11877 Register dest = MI.getOperand(0).getReg();
11878 Register ptrA = MI.getOperand(1).getReg();
11879 Register ptrB = MI.getOperand(2).getReg();
11880 Register incr = MI.getOperand(3).getReg();
11881 DebugLoc dl = MI.getDebugLoc();
11882
11883 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11884 MachineBasicBlock *loop2MBB =
11885 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11886 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11887 F->insert(It, loopMBB);
11888 if (CmpOpcode)
11889 F->insert(It, loop2MBB);
11890 F->insert(It, exitMBB);
11891 exitMBB->splice(exitMBB->begin(), BB,
11892 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11894
11895 MachineRegisterInfo &RegInfo = F->getRegInfo();
11896 Register TmpReg = (!BinOpcode) ? incr :
11897 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
11898 : &PPC::GPRCRegClass);
11899
11900 // thisMBB:
11901 // ...
11902 // fallthrough --> loopMBB
11903 BB->addSuccessor(loopMBB);
11904
11905 // loopMBB:
11906 // l[wd]arx dest, ptr
11907 // add r0, dest, incr
11908 // st[wd]cx. r0, ptr
11909 // bne- loopMBB
11910 // fallthrough --> exitMBB
11911
11912 // For max/min...
11913 // loopMBB:
11914 // l[wd]arx dest, ptr
11915 // cmpl?[wd] dest, incr
11916 // bgt exitMBB
11917 // loop2MBB:
11918 // st[wd]cx. dest, ptr
11919 // bne- loopMBB
11920 // fallthrough --> exitMBB
11921
11922 BB = loopMBB;
11923 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
11924 .addReg(ptrA).addReg(ptrB);
11925 if (BinOpcode)
11926 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
11927 if (CmpOpcode) {
11928 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11929 // Signed comparisons of byte or halfword values must be sign-extended.
11930 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11931 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11932 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11933 ExtReg).addReg(dest);
11934 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
11935 } else
11936 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
11937
11938 BuildMI(BB, dl, TII->get(PPC::BCC))
11939 .addImm(CmpPred)
11940 .addReg(CrReg)
11941 .addMBB(exitMBB);
11942 BB->addSuccessor(loop2MBB);
11943 BB->addSuccessor(exitMBB);
11944 BB = loop2MBB;
11945 }
11946 BuildMI(BB, dl, TII->get(StoreMnemonic))
11947 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
11948 BuildMI(BB, dl, TII->get(PPC::BCC))
11949 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
11950 BB->addSuccessor(loopMBB);
11951 BB->addSuccessor(exitMBB);
11952
11953 // exitMBB:
11954 // ...
11955 BB = exitMBB;
11956 return BB;
11957}
11958
11960 switch(MI.getOpcode()) {
11961 default:
11962 return false;
11963 case PPC::COPY:
11964 return TII->isSignExtended(MI.getOperand(1).getReg(),
11965 &MI.getMF()->getRegInfo());
11966 case PPC::LHA:
11967 case PPC::LHA8:
11968 case PPC::LHAU:
11969 case PPC::LHAU8:
11970 case PPC::LHAUX:
11971 case PPC::LHAUX8:
11972 case PPC::LHAX:
11973 case PPC::LHAX8:
11974 case PPC::LWA:
11975 case PPC::LWAUX:
11976 case PPC::LWAX:
11977 case PPC::LWAX_32:
11978 case PPC::LWA_32:
11979 case PPC::PLHA:
11980 case PPC::PLHA8:
11981 case PPC::PLHA8pc:
11982 case PPC::PLHApc:
11983 case PPC::PLWA:
11984 case PPC::PLWA8:
11985 case PPC::PLWA8pc:
11986 case PPC::PLWApc:
11987 case PPC::EXTSB:
11988 case PPC::EXTSB8:
11989 case PPC::EXTSB8_32_64:
11990 case PPC::EXTSB8_rec:
11991 case PPC::EXTSB_rec:
11992 case PPC::EXTSH:
11993 case PPC::EXTSH8:
11994 case PPC::EXTSH8_32_64:
11995 case PPC::EXTSH8_rec:
11996 case PPC::EXTSH_rec:
11997 case PPC::EXTSW:
11998 case PPC::EXTSWSLI:
11999 case PPC::EXTSWSLI_32_64:
12000 case PPC::EXTSWSLI_32_64_rec:
12001 case PPC::EXTSWSLI_rec:
12002 case PPC::EXTSW_32:
12003 case PPC::EXTSW_32_64:
12004 case PPC::EXTSW_32_64_rec:
12005 case PPC::EXTSW_rec:
12006 case PPC::SRAW:
12007 case PPC::SRAWI:
12008 case PPC::SRAWI_rec:
12009 case PPC::SRAW_rec:
12010 return true;
12011 }
12012 return false;
12013}
12014
12017 bool is8bit, // operation
12018 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
12019 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12020 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12021
12022 // If this is a signed comparison and the value being compared is not known
12023 // to be sign extended, sign extend it here.
12024 DebugLoc dl = MI.getDebugLoc();
12025 MachineFunction *F = BB->getParent();
12026 MachineRegisterInfo &RegInfo = F->getRegInfo();
12027 Register incr = MI.getOperand(3).getReg();
12028 bool IsSignExtended =
12029 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
12030
12031 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
12032 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12033 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
12034 .addReg(MI.getOperand(3).getReg());
12035 MI.getOperand(3).setReg(ValueReg);
12036 incr = ValueReg;
12037 }
12038 // If we support part-word atomic mnemonics, just use them
12039 if (Subtarget.hasPartwordAtomics())
12040 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
12041 CmpPred);
12042
12043 // In 64 bit mode we have to use 64 bits for addresses, even though the
12044 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
12045 // registers without caring whether they're 32 or 64, but here we're
12046 // doing actual arithmetic on the addresses.
12047 bool is64bit = Subtarget.isPPC64();
12048 bool isLittleEndian = Subtarget.isLittleEndian();
12049 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12050
12051 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12053
12054 Register dest = MI.getOperand(0).getReg();
12055 Register ptrA = MI.getOperand(1).getReg();
12056 Register ptrB = MI.getOperand(2).getReg();
12057
12058 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12059 MachineBasicBlock *loop2MBB =
12060 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12061 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12062 F->insert(It, loopMBB);
12063 if (CmpOpcode)
12064 F->insert(It, loop2MBB);
12065 F->insert(It, exitMBB);
12066 exitMBB->splice(exitMBB->begin(), BB,
12067 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12069
12070 const TargetRegisterClass *RC =
12071 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12072 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12073
12074 Register PtrReg = RegInfo.createVirtualRegister(RC);
12075 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12076 Register ShiftReg =
12077 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12078 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
12079 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12080 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12081 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12082 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12083 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
12084 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12085 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12086 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
12087 Register Ptr1Reg;
12088 Register TmpReg =
12089 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
12090
12091 // thisMBB:
12092 // ...
12093 // fallthrough --> loopMBB
12094 BB->addSuccessor(loopMBB);
12095
12096 // The 4-byte load must be aligned, while a char or short may be
12097 // anywhere in the word. Hence all this nasty bookkeeping code.
12098 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12099 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12100 // xori shift, shift1, 24 [16]
12101 // rlwinm ptr, ptr1, 0, 0, 29
12102 // slw incr2, incr, shift
12103 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12104 // slw mask, mask2, shift
12105 // loopMBB:
12106 // lwarx tmpDest, ptr
12107 // add tmp, tmpDest, incr2
12108 // andc tmp2, tmpDest, mask
12109 // and tmp3, tmp, mask
12110 // or tmp4, tmp3, tmp2
12111 // stwcx. tmp4, ptr
12112 // bne- loopMBB
12113 // fallthrough --> exitMBB
12114 // srw SrwDest, tmpDest, shift
12115 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12116 if (ptrA != ZeroReg) {
12117 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12118 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12119 .addReg(ptrA)
12120 .addReg(ptrB);
12121 } else {
12122 Ptr1Reg = ptrB;
12123 }
12124 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12125 // mode.
12126 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12127 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12128 .addImm(3)
12129 .addImm(27)
12130 .addImm(is8bit ? 28 : 27);
12131 if (!isLittleEndian)
12132 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12133 .addReg(Shift1Reg)
12134 .addImm(is8bit ? 24 : 16);
12135 if (is64bit)
12136 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12137 .addReg(Ptr1Reg)
12138 .addImm(0)
12139 .addImm(61);
12140 else
12141 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12142 .addReg(Ptr1Reg)
12143 .addImm(0)
12144 .addImm(0)
12145 .addImm(29);
12146 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
12147 if (is8bit)
12148 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12149 else {
12150 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12151 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12152 .addReg(Mask3Reg)
12153 .addImm(65535);
12154 }
12155 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12156 .addReg(Mask2Reg)
12157 .addReg(ShiftReg);
12158
12159 BB = loopMBB;
12160 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12161 .addReg(ZeroReg)
12162 .addReg(PtrReg);
12163 if (BinOpcode)
12164 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
12165 .addReg(Incr2Reg)
12166 .addReg(TmpDestReg);
12167 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12168 .addReg(TmpDestReg)
12169 .addReg(MaskReg);
12170 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
12171 if (CmpOpcode) {
12172 // For unsigned comparisons, we can directly compare the shifted values.
12173 // For signed comparisons we shift and sign extend.
12174 Register SReg = RegInfo.createVirtualRegister(GPRC);
12175 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12176 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
12177 .addReg(TmpDestReg)
12178 .addReg(MaskReg);
12179 unsigned ValueReg = SReg;
12180 unsigned CmpReg = Incr2Reg;
12181 if (CmpOpcode == PPC::CMPW) {
12182 ValueReg = RegInfo.createVirtualRegister(GPRC);
12183 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
12184 .addReg(SReg)
12185 .addReg(ShiftReg);
12186 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
12187 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
12188 .addReg(ValueReg);
12189 ValueReg = ValueSReg;
12190 CmpReg = incr;
12191 }
12192 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
12193 BuildMI(BB, dl, TII->get(PPC::BCC))
12194 .addImm(CmpPred)
12195 .addReg(CrReg)
12196 .addMBB(exitMBB);
12197 BB->addSuccessor(loop2MBB);
12198 BB->addSuccessor(exitMBB);
12199 BB = loop2MBB;
12200 }
12201 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
12202 BuildMI(BB, dl, TII->get(PPC::STWCX))
12203 .addReg(Tmp4Reg)
12204 .addReg(ZeroReg)
12205 .addReg(PtrReg);
12206 BuildMI(BB, dl, TII->get(PPC::BCC))
12208 .addReg(PPC::CR0)
12209 .addMBB(loopMBB);
12210 BB->addSuccessor(loopMBB);
12211 BB->addSuccessor(exitMBB);
12212
12213 // exitMBB:
12214 // ...
12215 BB = exitMBB;
12216 // Since the shift amount is not a constant, we need to clear
12217 // the upper bits with a separate RLWINM.
12218 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
12219 .addReg(SrwDestReg)
12220 .addImm(0)
12221 .addImm(is8bit ? 24 : 16)
12222 .addImm(31);
12223 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
12224 .addReg(TmpDestReg)
12225 .addReg(ShiftReg);
12226 return BB;
12227}
12228
12231 MachineBasicBlock *MBB) const {
12232 DebugLoc DL = MI.getDebugLoc();
12233 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12234 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12235
12236 MachineFunction *MF = MBB->getParent();
12238
12239 const BasicBlock *BB = MBB->getBasicBlock();
12241
12242 Register DstReg = MI.getOperand(0).getReg();
12243 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
12244 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
12245 Register mainDstReg = MRI.createVirtualRegister(RC);
12246 Register restoreDstReg = MRI.createVirtualRegister(RC);
12247
12248 MVT PVT = getPointerTy(MF->getDataLayout());
12249 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12250 "Invalid Pointer Size!");
12251 // For v = setjmp(buf), we generate
12252 //
12253 // thisMBB:
12254 // SjLjSetup mainMBB
12255 // bl mainMBB
12256 // v_restore = 1
12257 // b sinkMBB
12258 //
12259 // mainMBB:
12260 // buf[LabelOffset] = LR
12261 // v_main = 0
12262 //
12263 // sinkMBB:
12264 // v = phi(main, restore)
12265 //
12266
12267 MachineBasicBlock *thisMBB = MBB;
12268 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
12269 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
12270 MF->insert(I, mainMBB);
12271 MF->insert(I, sinkMBB);
12272
12274
12275 // Transfer the remainder of BB and its successor edges to sinkMBB.
12276 sinkMBB->splice(sinkMBB->begin(), MBB,
12277 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12279
12280 // Note that the structure of the jmp_buf used here is not compatible
12281 // with that used by libc, and is not designed to be. Specifically, it
12282 // stores only those 'reserved' registers that LLVM does not otherwise
12283 // understand how to spill. Also, by convention, by the time this
12284 // intrinsic is called, Clang has already stored the frame address in the
12285 // first slot of the buffer and stack address in the third. Following the
12286 // X86 target code, we'll store the jump address in the second slot. We also
12287 // need to save the TOC pointer (R2) to handle jumps between shared
12288 // libraries, and that will be stored in the fourth slot. The thread
12289 // identifier (R13) is not affected.
12290
12291 // thisMBB:
12292 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12293 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12294 const int64_t BPOffset = 4 * PVT.getStoreSize();
12295
12296 // Prepare IP either in reg.
12297 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
12298 Register LabelReg = MRI.createVirtualRegister(PtrRC);
12299 Register BufReg = MI.getOperand(1).getReg();
12300
12301 if (Subtarget.is64BitELFABI()) {
12303 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
12304 .addReg(PPC::X2)
12305 .addImm(TOCOffset)
12306 .addReg(BufReg)
12307 .cloneMemRefs(MI);
12308 }
12309
12310 // Naked functions never have a base pointer, and so we use r1. For all
12311 // other functions, this decision must be delayed until during PEI.
12312 unsigned BaseReg;
12313 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12314 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12315 else
12316 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12317
12318 MIB = BuildMI(*thisMBB, MI, DL,
12319 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12320 .addReg(BaseReg)
12321 .addImm(BPOffset)
12322 .addReg(BufReg)
12323 .cloneMemRefs(MI);
12324
12325 // Setup
12326 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12327 MIB.addRegMask(TRI->getNoPreservedMask());
12328
12329 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
12330
12331 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12332 .addMBB(mainMBB);
12333 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12334
12335 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
12336 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
12337
12338 // mainMBB:
12339 // mainDstReg = 0
12340 MIB =
12341 BuildMI(mainMBB, DL,
12342 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12343
12344 // Store IP
12345 if (Subtarget.isPPC64()) {
12346 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12347 .addReg(LabelReg)
12348 .addImm(LabelOffset)
12349 .addReg(BufReg);
12350 } else {
12351 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12352 .addReg(LabelReg)
12353 .addImm(LabelOffset)
12354 .addReg(BufReg);
12355 }
12356 MIB.cloneMemRefs(MI);
12357
12358 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
12359 mainMBB->addSuccessor(sinkMBB);
12360
12361 // sinkMBB:
12362 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12363 TII->get(PPC::PHI), DstReg)
12364 .addReg(mainDstReg).addMBB(mainMBB)
12365 .addReg(restoreDstReg).addMBB(thisMBB);
12366
12367 MI.eraseFromParent();
12368 return sinkMBB;
12369}
12370
12373 MachineBasicBlock *MBB) const {
12374 DebugLoc DL = MI.getDebugLoc();
12375 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12376
12377 MachineFunction *MF = MBB->getParent();
12379
12380 MVT PVT = getPointerTy(MF->getDataLayout());
12381 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12382 "Invalid Pointer Size!");
12383
12384 const TargetRegisterClass *RC =
12385 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12386 Register Tmp = MRI.createVirtualRegister(RC);
12387 // Since FP is only updated here but NOT referenced, it's treated as GPR.
12388 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12389 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12390 unsigned BP =
12391 (PVT == MVT::i64)
12392 ? PPC::X30
12393 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12394 : PPC::R30);
12395
12397
12398 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12399 const int64_t SPOffset = 2 * PVT.getStoreSize();
12400 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12401 const int64_t BPOffset = 4 * PVT.getStoreSize();
12402
12403 Register BufReg = MI.getOperand(0).getReg();
12404
12405 // Reload FP (the jumped-to function may not have had a
12406 // frame pointer, and if so, then its r31 will be restored
12407 // as necessary).
12408 if (PVT == MVT::i64) {
12409 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12410 .addImm(0)
12411 .addReg(BufReg);
12412 } else {
12413 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12414 .addImm(0)
12415 .addReg(BufReg);
12416 }
12417 MIB.cloneMemRefs(MI);
12418
12419 // Reload IP
12420 if (PVT == MVT::i64) {
12421 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12422 .addImm(LabelOffset)
12423 .addReg(BufReg);
12424 } else {
12425 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12426 .addImm(LabelOffset)
12427 .addReg(BufReg);
12428 }
12429 MIB.cloneMemRefs(MI);
12430
12431 // Reload SP
12432 if (PVT == MVT::i64) {
12433 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12434 .addImm(SPOffset)
12435 .addReg(BufReg);
12436 } else {
12437 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12438 .addImm(SPOffset)
12439 .addReg(BufReg);
12440 }
12441 MIB.cloneMemRefs(MI);
12442
12443 // Reload BP
12444 if (PVT == MVT::i64) {
12445 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12446 .addImm(BPOffset)
12447 .addReg(BufReg);
12448 } else {
12449 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12450 .addImm(BPOffset)
12451 .addReg(BufReg);
12452 }
12453 MIB.cloneMemRefs(MI);
12454
12455 // Reload TOC
12456 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12458 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12459 .addImm(TOCOffset)
12460 .addReg(BufReg)
12461 .cloneMemRefs(MI);
12462 }
12463
12464 // Jump
12465 BuildMI(*MBB, MI, DL,
12466 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12467 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12468
12469 MI.eraseFromParent();
12470 return MBB;
12471}
12472
12474 // If the function specifically requests inline stack probes, emit them.
12475 if (MF.getFunction().hasFnAttribute("probe-stack"))
12476 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12477 "inline-asm";
12478 return false;
12479}
12480
12482 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12483 unsigned StackAlign = TFI->getStackAlignment();
12484 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
12485 "Unexpected stack alignment");
12486 // The default stack probe size is 4096 if the function has no
12487 // stack-probe-size attribute.
12488 const Function &Fn = MF.getFunction();
12489 unsigned StackProbeSize =
12490 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12491 // Round down to the stack alignment.
12492 StackProbeSize &= ~(StackAlign - 1);
12493 return StackProbeSize ? StackProbeSize : StackAlign;
12494}
12495
12496// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12497// into three phases. In the first phase, it uses pseudo instruction
12498// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12499// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12500// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12501// MaxCallFrameSize so that it can calculate correct data area pointer.
12504 MachineBasicBlock *MBB) const {
12505 const bool isPPC64 = Subtarget.isPPC64();
12506 MachineFunction *MF = MBB->getParent();
12507 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12508 DebugLoc DL = MI.getDebugLoc();
12509 const unsigned ProbeSize = getStackProbeSize(*MF);
12510 const BasicBlock *ProbedBB = MBB->getBasicBlock();
12512 // The CFG of probing stack looks as
12513 // +-----+
12514 // | MBB |
12515 // +--+--+
12516 // |
12517 // +----v----+
12518 // +--->+ TestMBB +---+
12519 // | +----+----+ |
12520 // | | |
12521 // | +-----v----+ |
12522 // +---+ BlockMBB | |
12523 // +----------+ |
12524 // |
12525 // +---------+ |
12526 // | TailMBB +<--+
12527 // +---------+
12528 // In MBB, calculate previous frame pointer and final stack pointer.
12529 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12530 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12531 // TailMBB is spliced via \p MI.
12532 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
12533 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
12534 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
12535
12537 MF->insert(MBBIter, TestMBB);
12538 MF->insert(MBBIter, BlockMBB);
12539 MF->insert(MBBIter, TailMBB);
12540
12541 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
12542 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12543
12544 Register DstReg = MI.getOperand(0).getReg();
12545 Register NegSizeReg = MI.getOperand(1).getReg();
12546 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12547 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12548 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12549 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12550
12551 // Since value of NegSizeReg might be realigned in prologepilog, insert a
12552 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12553 // NegSize.
12554 unsigned ProbeOpc;
12555 if (!MRI.hasOneNonDBGUse(NegSizeReg))
12556 ProbeOpc =
12557 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12558 else
12559 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12560 // and NegSizeReg will be allocated in the same phyreg to avoid
12561 // redundant copy when NegSizeReg has only one use which is current MI and
12562 // will be replaced by PREPARE_PROBED_ALLOCA then.
12563 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12564 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12565 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
12566 .addDef(ActualNegSizeReg)
12567 .addReg(NegSizeReg)
12568 .add(MI.getOperand(2))
12569 .add(MI.getOperand(3));
12570
12571 // Calculate final stack pointer, which equals to SP + ActualNegSize.
12572 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12573 FinalStackPtr)
12574 .addReg(SPReg)
12575 .addReg(ActualNegSizeReg);
12576
12577 // Materialize a scratch register for update.
12578 int64_t NegProbeSize = -(int64_t)ProbeSize;
12579 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
12580 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12581 if (!isInt<16>(NegProbeSize)) {
12582 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12583 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12584 .addImm(NegProbeSize >> 16);
12585 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12586 ScratchReg)
12587 .addReg(TempReg)
12588 .addImm(NegProbeSize & 0xFFFF);
12589 } else
12590 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12591 .addImm(NegProbeSize);
12592
12593 {
12594 // Probing leading residual part.
12595 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12596 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12597 .addReg(ActualNegSizeReg)
12598 .addReg(ScratchReg);
12599 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12600 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12601 .addReg(Div)
12602 .addReg(ScratchReg);
12603 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12604 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12605 .addReg(Mul)
12606 .addReg(ActualNegSizeReg);
12607 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12608 .addReg(FramePointer)
12609 .addReg(SPReg)
12610 .addReg(NegMod);
12611 }
12612
12613 {
12614 // Remaining part should be multiple of ProbeSize.
12615 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12616 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12617 .addReg(SPReg)
12618 .addReg(FinalStackPtr);
12619 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12621 .addReg(CmpResult)
12622 .addMBB(TailMBB);
12623 TestMBB->addSuccessor(BlockMBB);
12624 TestMBB->addSuccessor(TailMBB);
12625 }
12626
12627 {
12628 // Touch the block.
12629 // |P...|P...|P...
12630 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12631 .addReg(FramePointer)
12632 .addReg(SPReg)
12633 .addReg(ScratchReg);
12634 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12635 BlockMBB->addSuccessor(TestMBB);
12636 }
12637
12638 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12639 // DYNAREAOFFSET pseudo instruction to get the future result.
12640 Register MaxCallFrameSizeReg =
12641 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12642 BuildMI(TailMBB, DL,
12643 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12644 MaxCallFrameSizeReg)
12645 .add(MI.getOperand(2))
12646 .add(MI.getOperand(3));
12647 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12648 .addReg(SPReg)
12649 .addReg(MaxCallFrameSizeReg);
12650
12651 // Splice instructions after MI to TailMBB.
12652 TailMBB->splice(TailMBB->end(), MBB,
12653 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12655 MBB->addSuccessor(TestMBB);
12656
12657 // Delete the pseudo instruction.
12658 MI.eraseFromParent();
12659
12660 ++NumDynamicAllocaProbed;
12661 return TailMBB;
12662}
12663
12666 MachineBasicBlock *BB) const {
12667 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
12668 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12669 if (Subtarget.is64BitELFABI() &&
12670 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12671 !Subtarget.isUsingPCRelativeCalls()) {
12672 // Call lowering should have added an r2 operand to indicate a dependence
12673 // on the TOC base pointer value. It can't however, because there is no
12674 // way to mark the dependence as implicit there, and so the stackmap code
12675 // will confuse it with a regular operand. Instead, add the dependence
12676 // here.
12677 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12678 }
12679
12680 return emitPatchPoint(MI, BB);
12681 }
12682
12683 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12684 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12685 return emitEHSjLjSetJmp(MI, BB);
12686 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12687 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12688 return emitEHSjLjLongJmp(MI, BB);
12689 }
12690
12691 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12692
12693 // To "insert" these instructions we actually have to insert their
12694 // control-flow patterns.
12695 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12697
12698 MachineFunction *F = BB->getParent();
12699 MachineRegisterInfo &MRI = F->getRegInfo();
12700
12701 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12702 MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
12703 MI.getOpcode() == PPC::SELECT_I8) {
12705 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12706 MI.getOpcode() == PPC::SELECT_CC_I8)
12707 Cond.push_back(MI.getOperand(4));
12708 else
12710 Cond.push_back(MI.getOperand(1));
12711
12712 DebugLoc dl = MI.getDebugLoc();
12713 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
12714 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
12715 } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
12716 MI.getOpcode() == PPC::SELECT_CC_F8 ||
12717 MI.getOpcode() == PPC::SELECT_CC_F16 ||
12718 MI.getOpcode() == PPC::SELECT_CC_VRRC ||
12719 MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
12720 MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
12721 MI.getOpcode() == PPC::SELECT_CC_VSRC ||
12722 MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
12723 MI.getOpcode() == PPC::SELECT_CC_SPE ||
12724 MI.getOpcode() == PPC::SELECT_F4 ||
12725 MI.getOpcode() == PPC::SELECT_F8 ||
12726 MI.getOpcode() == PPC::SELECT_F16 ||
12727 MI.getOpcode() == PPC::SELECT_SPE ||
12728 MI.getOpcode() == PPC::SELECT_SPE4 ||
12729 MI.getOpcode() == PPC::SELECT_VRRC ||
12730 MI.getOpcode() == PPC::SELECT_VSFRC ||
12731 MI.getOpcode() == PPC::SELECT_VSSRC ||
12732 MI.getOpcode() == PPC::SELECT_VSRC) {
12733 // The incoming instruction knows the destination vreg to set, the
12734 // condition code register to branch on, the true/false values to
12735 // select between, and a branch opcode to use.
12736
12737 // thisMBB:
12738 // ...
12739 // TrueVal = ...
12740 // cmpTY ccX, r1, r2
12741 // bCC copy1MBB
12742 // fallthrough --> copy0MBB
12743 MachineBasicBlock *thisMBB = BB;
12744 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12745 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12746 DebugLoc dl = MI.getDebugLoc();
12747 F->insert(It, copy0MBB);
12748 F->insert(It, sinkMBB);
12749
12750 // Transfer the remainder of BB and its successor edges to sinkMBB.
12751 sinkMBB->splice(sinkMBB->begin(), BB,
12752 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12754
12755 // Next, add the true and fallthrough blocks as its successors.
12756 BB->addSuccessor(copy0MBB);
12757 BB->addSuccessor(sinkMBB);
12758
12759 if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
12760 MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
12761 MI.getOpcode() == PPC::SELECT_F16 ||
12762 MI.getOpcode() == PPC::SELECT_SPE4 ||
12763 MI.getOpcode() == PPC::SELECT_SPE ||
12764 MI.getOpcode() == PPC::SELECT_VRRC ||
12765 MI.getOpcode() == PPC::SELECT_VSFRC ||
12766 MI.getOpcode() == PPC::SELECT_VSSRC ||
12767 MI.getOpcode() == PPC::SELECT_VSRC) {
12768 BuildMI(BB, dl, TII->get(PPC::BC))
12769 .addReg(MI.getOperand(1).getReg())
12770 .addMBB(sinkMBB);
12771 } else {
12772 unsigned SelectPred = MI.getOperand(4).getImm();
12773 BuildMI(BB, dl, TII->get(PPC::BCC))
12774 .addImm(SelectPred)
12775 .addReg(MI.getOperand(1).getReg())
12776 .addMBB(sinkMBB);
12777 }
12778
12779 // copy0MBB:
12780 // %FalseValue = ...
12781 // # fallthrough to sinkMBB
12782 BB = copy0MBB;
12783
12784 // Update machine-CFG edges
12785 BB->addSuccessor(sinkMBB);
12786
12787 // sinkMBB:
12788 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12789 // ...
12790 BB = sinkMBB;
12791 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
12792 .addReg(MI.getOperand(3).getReg())
12793 .addMBB(copy0MBB)
12794 .addReg(MI.getOperand(2).getReg())
12795 .addMBB(thisMBB);
12796 } else if (MI.getOpcode() == PPC::ReadTB) {
12797 // To read the 64-bit time-base register on a 32-bit target, we read the
12798 // two halves. Should the counter have wrapped while it was being read, we
12799 // need to try again.
12800 // ...
12801 // readLoop:
12802 // mfspr Rx,TBU # load from TBU
12803 // mfspr Ry,TB # load from TB
12804 // mfspr Rz,TBU # load from TBU
12805 // cmpw crX,Rx,Rz # check if 'old'='new'
12806 // bne readLoop # branch if they're not equal
12807 // ...
12808
12809 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
12810 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12811 DebugLoc dl = MI.getDebugLoc();
12812 F->insert(It, readMBB);
12813 F->insert(It, sinkMBB);
12814
12815 // Transfer the remainder of BB and its successor edges to sinkMBB.
12816 sinkMBB->splice(sinkMBB->begin(), BB,
12817 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12819
12820 BB->addSuccessor(readMBB);
12821 BB = readMBB;
12822
12823 MachineRegisterInfo &RegInfo = F->getRegInfo();
12824 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12825 Register LoReg = MI.getOperand(0).getReg();
12826 Register HiReg = MI.getOperand(1).getReg();
12827
12828 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
12829 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
12830 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
12831
12832 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12833
12834 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
12835 .addReg(HiReg)
12836 .addReg(ReadAgainReg);
12837 BuildMI(BB, dl, TII->get(PPC::BCC))
12839 .addReg(CmpReg)
12840 .addMBB(readMBB);
12841
12842 BB->addSuccessor(readMBB);
12843 BB->addSuccessor(sinkMBB);
12844 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12845 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
12846 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12847 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
12848 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12849 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
12850 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12851 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
12852
12853 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12854 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
12855 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12856 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
12857 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12858 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
12859 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12860 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
12861
12862 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12863 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
12864 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12865 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
12866 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12867 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
12868 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12869 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
12870
12871 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12872 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
12873 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12874 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
12875 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12876 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
12877 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12878 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
12879
12880 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12881 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
12882 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12883 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
12884 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12885 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
12886 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12887 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
12888
12889 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12890 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
12891 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12892 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
12893 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
12894 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
12895 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
12896 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
12897
12898 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
12899 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
12900 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
12901 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
12902 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
12903 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
12904 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
12905 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
12906
12907 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
12908 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
12909 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
12910 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
12911 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
12912 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
12913 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
12914 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
12915
12916 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
12917 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
12918 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
12919 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
12920 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
12921 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
12922 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
12923 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
12924
12925 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
12926 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
12927 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
12928 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
12929 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
12930 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
12931 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
12932 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
12933
12934 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12935 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
12936 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12937 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
12938 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12939 BB = EmitAtomicBinary(MI, BB, 4, 0);
12940 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12941 BB = EmitAtomicBinary(MI, BB, 8, 0);
12942 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12943 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12944 (Subtarget.hasPartwordAtomics() &&
12945 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12946 (Subtarget.hasPartwordAtomics() &&
12947 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12948 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12949
12950 auto LoadMnemonic = PPC::LDARX;
12951 auto StoreMnemonic = PPC::STDCX;
12952 switch (MI.getOpcode()) {
12953 default:
12954 llvm_unreachable("Compare and swap of unknown size");
12955 case PPC::ATOMIC_CMP_SWAP_I8:
12956 LoadMnemonic = PPC::LBARX;
12957 StoreMnemonic = PPC::STBCX;
12958 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12959 break;
12960 case PPC::ATOMIC_CMP_SWAP_I16:
12961 LoadMnemonic = PPC::LHARX;
12962 StoreMnemonic = PPC::STHCX;
12963 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12964 break;
12965 case PPC::ATOMIC_CMP_SWAP_I32:
12966 LoadMnemonic = PPC::LWARX;
12967 StoreMnemonic = PPC::STWCX;
12968 break;
12969 case PPC::ATOMIC_CMP_SWAP_I64:
12970 LoadMnemonic = PPC::LDARX;
12971 StoreMnemonic = PPC::STDCX;
12972 break;
12973 }
12974 MachineRegisterInfo &RegInfo = F->getRegInfo();
12975 Register dest = MI.getOperand(0).getReg();
12976 Register ptrA = MI.getOperand(1).getReg();
12977 Register ptrB = MI.getOperand(2).getReg();
12978 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12979 Register oldval = MI.getOperand(3).getReg();
12980 Register newval = MI.getOperand(4).getReg();
12981 DebugLoc dl = MI.getDebugLoc();
12982
12983 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12984 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12985 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12986 F->insert(It, loop1MBB);
12987 F->insert(It, loop2MBB);
12988 F->insert(It, exitMBB);
12989 exitMBB->splice(exitMBB->begin(), BB,
12990 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12992
12993 // thisMBB:
12994 // ...
12995 // fallthrough --> loopMBB
12996 BB->addSuccessor(loop1MBB);
12997
12998 // loop1MBB:
12999 // l[bhwd]arx dest, ptr
13000 // cmp[wd] dest, oldval
13001 // bne- exitBB
13002 // loop2MBB:
13003 // st[bhwd]cx. newval, ptr
13004 // bne- loopMBB
13005 // b exitBB
13006 // exitBB:
13007 BB = loop1MBB;
13008 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
13009 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
13010 .addReg(dest)
13011 .addReg(oldval);
13012 BuildMI(BB, dl, TII->get(PPC::BCC))
13014 .addReg(CrReg)
13015 .addMBB(exitMBB);
13016 BB->addSuccessor(loop2MBB);
13017 BB->addSuccessor(exitMBB);
13018
13019 BB = loop2MBB;
13020 BuildMI(BB, dl, TII->get(StoreMnemonic))
13021 .addReg(newval)
13022 .addReg(ptrA)
13023 .addReg(ptrB);
13024 BuildMI(BB, dl, TII->get(PPC::BCC))
13026 .addReg(PPC::CR0)
13027 .addMBB(loop1MBB);
13028 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13029 BB->addSuccessor(loop1MBB);
13030 BB->addSuccessor(exitMBB);
13031
13032 // exitMBB:
13033 // ...
13034 BB = exitMBB;
13035 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
13036 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
13037 // We must use 64-bit registers for addresses when targeting 64-bit,
13038 // since we're actually doing arithmetic on them. Other registers
13039 // can be 32-bit.
13040 bool is64bit = Subtarget.isPPC64();
13041 bool isLittleEndian = Subtarget.isLittleEndian();
13042 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
13043
13044 Register dest = MI.getOperand(0).getReg();
13045 Register ptrA = MI.getOperand(1).getReg();
13046 Register ptrB = MI.getOperand(2).getReg();
13047 Register oldval = MI.getOperand(3).getReg();
13048 Register newval = MI.getOperand(4).getReg();
13049 DebugLoc dl = MI.getDebugLoc();
13050
13051 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13052 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13053 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13054 F->insert(It, loop1MBB);
13055 F->insert(It, loop2MBB);
13056 F->insert(It, exitMBB);
13057 exitMBB->splice(exitMBB->begin(), BB,
13058 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13060
13061 MachineRegisterInfo &RegInfo = F->getRegInfo();
13062 const TargetRegisterClass *RC =
13063 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13064 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13065
13066 Register PtrReg = RegInfo.createVirtualRegister(RC);
13067 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13068 Register ShiftReg =
13069 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13070 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
13071 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
13072 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
13073 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
13074 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13075 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13076 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13077 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13078 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13079 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13080 Register Ptr1Reg;
13081 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
13082 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13083 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13084 // thisMBB:
13085 // ...
13086 // fallthrough --> loopMBB
13087 BB->addSuccessor(loop1MBB);
13088
13089 // The 4-byte load must be aligned, while a char or short may be
13090 // anywhere in the word. Hence all this nasty bookkeeping code.
13091 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13092 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13093 // xori shift, shift1, 24 [16]
13094 // rlwinm ptr, ptr1, 0, 0, 29
13095 // slw newval2, newval, shift
13096 // slw oldval2, oldval,shift
13097 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13098 // slw mask, mask2, shift
13099 // and newval3, newval2, mask
13100 // and oldval3, oldval2, mask
13101 // loop1MBB:
13102 // lwarx tmpDest, ptr
13103 // and tmp, tmpDest, mask
13104 // cmpw tmp, oldval3
13105 // bne- exitBB
13106 // loop2MBB:
13107 // andc tmp2, tmpDest, mask
13108 // or tmp4, tmp2, newval3
13109 // stwcx. tmp4, ptr
13110 // bne- loop1MBB
13111 // b exitBB
13112 // exitBB:
13113 // srw dest, tmpDest, shift
13114 if (ptrA != ZeroReg) {
13115 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13116 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13117 .addReg(ptrA)
13118 .addReg(ptrB);
13119 } else {
13120 Ptr1Reg = ptrB;
13121 }
13122
13123 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13124 // mode.
13125 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13126 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13127 .addImm(3)
13128 .addImm(27)
13129 .addImm(is8bit ? 28 : 27);
13130 if (!isLittleEndian)
13131 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13132 .addReg(Shift1Reg)
13133 .addImm(is8bit ? 24 : 16);
13134 if (is64bit)
13135 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13136 .addReg(Ptr1Reg)
13137 .addImm(0)
13138 .addImm(61);
13139 else
13140 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13141 .addReg(Ptr1Reg)
13142 .addImm(0)
13143 .addImm(0)
13144 .addImm(29);
13145 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
13146 .addReg(newval)
13147 .addReg(ShiftReg);
13148 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
13149 .addReg(oldval)
13150 .addReg(ShiftReg);
13151 if (is8bit)
13152 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13153 else {
13154 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13155 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13156 .addReg(Mask3Reg)
13157 .addImm(65535);
13158 }
13159 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13160 .addReg(Mask2Reg)
13161 .addReg(ShiftReg);
13162 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
13163 .addReg(NewVal2Reg)
13164 .addReg(MaskReg);
13165 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
13166 .addReg(OldVal2Reg)
13167 .addReg(MaskReg);
13168
13169 BB = loop1MBB;
13170 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13171 .addReg(ZeroReg)
13172 .addReg(PtrReg);
13173 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
13174 .addReg(TmpDestReg)
13175 .addReg(MaskReg);
13176 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
13177 .addReg(TmpReg)
13178 .addReg(OldVal3Reg);
13179 BuildMI(BB, dl, TII->get(PPC::BCC))
13181 .addReg(CrReg)
13182 .addMBB(exitMBB);
13183 BB->addSuccessor(loop2MBB);
13184 BB->addSuccessor(exitMBB);
13185
13186 BB = loop2MBB;
13187 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13188 .addReg(TmpDestReg)
13189 .addReg(MaskReg);
13190 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
13191 .addReg(Tmp2Reg)
13192 .addReg(NewVal3Reg);
13193 BuildMI(BB, dl, TII->get(PPC::STWCX))
13194 .addReg(Tmp4Reg)
13195 .addReg(ZeroReg)
13196 .addReg(PtrReg);
13197 BuildMI(BB, dl, TII->get(PPC::BCC))
13199 .addReg(PPC::CR0)
13200 .addMBB(loop1MBB);
13201 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13202 BB->addSuccessor(loop1MBB);
13203 BB->addSuccessor(exitMBB);
13204
13205 // exitMBB:
13206 // ...
13207 BB = exitMBB;
13208 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
13209 .addReg(TmpReg)
13210 .addReg(ShiftReg);
13211 } else if (MI.getOpcode() == PPC::FADDrtz) {
13212 // This pseudo performs an FADD with rounding mode temporarily forced
13213 // to round-to-zero. We emit this via custom inserter since the FPSCR
13214 // is not modeled at the SelectionDAG level.
13215 Register Dest = MI.getOperand(0).getReg();
13216 Register Src1 = MI.getOperand(1).getReg();
13217 Register Src2 = MI.getOperand(2).getReg();
13218 DebugLoc dl = MI.getDebugLoc();
13219
13220 MachineRegisterInfo &RegInfo = F->getRegInfo();
13221 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13222
13223 // Save FPSCR value.
13224 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
13225
13226 // Set rounding mode to round-to-zero.
13227 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
13228 .addImm(31)
13230
13231 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
13232 .addImm(30)
13234
13235 // Perform addition.
13236 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
13237 .addReg(Src1)
13238 .addReg(Src2);
13239 if (MI.getFlag(MachineInstr::NoFPExcept))
13241
13242 // Restore FPSCR value.
13243 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
13244 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13245 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
13246 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13247 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
13248 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13249 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
13250 ? PPC::ANDI8_rec
13251 : PPC::ANDI_rec;
13252 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13253 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
13254
13255 MachineRegisterInfo &RegInfo = F->getRegInfo();
13256 Register Dest = RegInfo.createVirtualRegister(
13257 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
13258
13259 DebugLoc Dl = MI.getDebugLoc();
13260 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
13261 .addReg(MI.getOperand(1).getReg())
13262 .addImm(1);
13263 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13264 MI.getOperand(0).getReg())
13265 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
13266 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
13267 DebugLoc Dl = MI.getDebugLoc();
13268 MachineRegisterInfo &RegInfo = F->getRegInfo();
13269 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13270 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
13271 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13272 MI.getOperand(0).getReg())
13273 .addReg(CRReg);
13274 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
13275 DebugLoc Dl = MI.getDebugLoc();
13276 unsigned Imm = MI.getOperand(1).getImm();
13277 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
13278 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13279 MI.getOperand(0).getReg())
13280 .addReg(PPC::CR0EQ);
13281 } else if (MI.getOpcode() == PPC::SETRNDi) {
13282 DebugLoc dl = MI.getDebugLoc();
13283 Register OldFPSCRReg = MI.getOperand(0).getReg();
13284
13285 // Save FPSCR value.
13286 if (MRI.use_empty(OldFPSCRReg))
13287 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13288 else
13289 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13290
13291 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13292 // the following settings:
13293 // 00 Round to nearest
13294 // 01 Round to 0
13295 // 10 Round to +inf
13296 // 11 Round to -inf
13297
13298 // When the operand is immediate, using the two least significant bits of
13299 // the immediate to set the bits 62:63 of FPSCR.
13300 unsigned Mode = MI.getOperand(1).getImm();
13301 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
13302 .addImm(31)
13304
13305 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
13306 .addImm(30)
13308 } else if (MI.getOpcode() == PPC::SETRND) {
13309 DebugLoc dl = MI.getDebugLoc();
13310
13311 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13312 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13313 // If the target doesn't have DirectMove, we should use stack to do the
13314 // conversion, because the target doesn't have the instructions like mtvsrd
13315 // or mfvsrd to do this conversion directly.
13316 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13317 if (Subtarget.hasDirectMove()) {
13318 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
13319 .addReg(SrcReg);
13320 } else {
13321 // Use stack to do the register copy.
13322 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13323 MachineRegisterInfo &RegInfo = F->getRegInfo();
13324 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
13325 if (RC == &PPC::F8RCRegClass) {
13326 // Copy register from F8RCRegClass to G8RCRegclass.
13327 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13328 "Unsupported RegClass.");
13329
13330 StoreOp = PPC::STFD;
13331 LoadOp = PPC::LD;
13332 } else {
13333 // Copy register from G8RCRegClass to F8RCRegclass.
13334 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13335 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13336 "Unsupported RegClass.");
13337 }
13338
13339 MachineFrameInfo &MFI = F->getFrameInfo();
13340 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
13341
13342 MachineMemOperand *MMOStore = F->getMachineMemOperand(
13343 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13345 MFI.getObjectAlign(FrameIdx));
13346
13347 // Store the SrcReg into the stack.
13348 BuildMI(*BB, MI, dl, TII->get(StoreOp))
13349 .addReg(SrcReg)
13350 .addImm(0)
13351 .addFrameIndex(FrameIdx)
13352 .addMemOperand(MMOStore);
13353
13354 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13355 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13357 MFI.getObjectAlign(FrameIdx));
13358
13359 // Load from the stack where SrcReg is stored, and save to DestReg,
13360 // so we have done the RegClass conversion from RegClass::SrcReg to
13361 // RegClass::DestReg.
13362 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
13363 .addImm(0)
13364 .addFrameIndex(FrameIdx)
13365 .addMemOperand(MMOLoad);
13366 }
13367 };
13368
13369 Register OldFPSCRReg = MI.getOperand(0).getReg();
13370
13371 // Save FPSCR value.
13372 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13373
13374 // When the operand is gprc register, use two least significant bits of the
13375 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13376 //
13377 // copy OldFPSCRTmpReg, OldFPSCRReg
13378 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13379 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13380 // copy NewFPSCRReg, NewFPSCRTmpReg
13381 // mtfsf 255, NewFPSCRReg
13382 MachineOperand SrcOp = MI.getOperand(1);
13383 MachineRegisterInfo &RegInfo = F->getRegInfo();
13384 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13385
13386 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
13387
13388 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13389 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13390
13391 // The first operand of INSERT_SUBREG should be a register which has
13392 // subregisters, we only care about its RegClass, so we should use an
13393 // IMPLICIT_DEF register.
13394 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13395 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13396 .addReg(ImDefReg)
13397 .add(SrcOp)
13398 .addImm(1);
13399
13400 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13401 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13402 .addReg(OldFPSCRTmpReg)
13403 .addReg(ExtSrcReg)
13404 .addImm(0)
13405 .addImm(62);
13406
13407 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13408 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
13409
13410 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13411 // bits of FPSCR.
13412 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13413 .addImm(255)
13414 .addReg(NewFPSCRReg)
13415 .addImm(0)
13416 .addImm(0);
13417 } else if (MI.getOpcode() == PPC::SETFLM) {
13418 DebugLoc Dl = MI.getDebugLoc();
13419
13420 // Result of setflm is previous FPSCR content, so we need to save it first.
13421 Register OldFPSCRReg = MI.getOperand(0).getReg();
13422 if (MRI.use_empty(OldFPSCRReg))
13423 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13424 else
13425 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13426
13427 // Put bits in 32:63 to FPSCR.
13428 Register NewFPSCRReg = MI.getOperand(1).getReg();
13429 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13430 .addImm(255)
13431 .addReg(NewFPSCRReg)
13432 .addImm(0)
13433 .addImm(0);
13434 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13435 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13436 return emitProbedAlloca(MI, BB);
13437 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13438 DebugLoc DL = MI.getDebugLoc();
13439 Register Src = MI.getOperand(2).getReg();
13440 Register Lo = MI.getOperand(0).getReg();
13441 Register Hi = MI.getOperand(1).getReg();
13442 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13443 .addDef(Lo)
13444 .addUse(Src, 0, PPC::sub_gp8_x1);
13445 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13446 .addDef(Hi)
13447 .addUse(Src, 0, PPC::sub_gp8_x0);
13448 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13449 MI.getOpcode() == PPC::STQX_PSEUDO) {
13450 DebugLoc DL = MI.getDebugLoc();
13451 // Ptr is used as the ptr_rc_no_r0 part
13452 // of LQ/STQ's memory operand and adding result of RA and RB,
13453 // so it has to be g8rc_and_g8rc_nox0.
13454 Register Ptr =
13455 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13456 Register Val = MI.getOperand(0).getReg();
13457 Register RA = MI.getOperand(1).getReg();
13458 Register RB = MI.getOperand(2).getReg();
13459 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13460 BuildMI(*BB, MI, DL,
13461 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13462 : TII->get(PPC::STQ))
13463 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13464 .addImm(0)
13465 .addReg(Ptr);
13466 } else {
13467 llvm_unreachable("Unexpected instr type to insert");
13468 }
13469
13470 MI.eraseFromParent(); // The pseudo instruction is gone now.
13471 return BB;
13472}
13473
13474//===----------------------------------------------------------------------===//
13475// Target Optimization Hooks
13476//===----------------------------------------------------------------------===//
13477
13478static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13479 // For the estimates, convergence is quadratic, so we essentially double the
13480 // number of digits correct after every iteration. For both FRE and FRSQRTE,
13481 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13482 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13483 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13484 if (VT.getScalarType() == MVT::f64)
13485 RefinementSteps++;
13486 return RefinementSteps;
13487}
13488
13489SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13490 const DenormalMode &Mode) const {
13491 // We only have VSX Vector Test for software Square Root.
13492 EVT VT = Op.getValueType();
13493 if (!isTypeLegal(MVT::i1) ||
13494 (VT != MVT::f64 &&
13495 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
13496 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
13497
13498 SDLoc DL(Op);
13499 // The output register of FTSQRT is CR field.
13500 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
13501 // ftsqrt BF,FRB
13502 // Let e_b be the unbiased exponent of the double-precision
13503 // floating-point operand in register FRB.
13504 // fe_flag is set to 1 if either of the following conditions occurs.
13505 // - The double-precision floating-point operand in register FRB is a zero,
13506 // a NaN, or an infinity, or a negative value.
13507 // - e_b is less than or equal to -970.
13508 // Otherwise fe_flag is set to 0.
13509 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13510 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
13511 // exponent is less than -970)
13512 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
13513 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
13514 FTSQRT, SRIdxVal),
13515 0);
13516}
13517
13518SDValue
13519PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
13520 SelectionDAG &DAG) const {
13521 // We only have VSX Vector Square Root.
13522 EVT VT = Op.getValueType();
13523 if (VT != MVT::f64 &&
13524 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
13526
13527 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
13528}
13529
13530SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13531 int Enabled, int &RefinementSteps,
13532 bool &UseOneConstNR,
13533 bool Reciprocal) const {
13534 EVT VT = Operand.getValueType();
13535 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
13536 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
13537 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13538 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13539 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13540 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13541
13542 // The Newton-Raphson computation with a single constant does not provide
13543 // enough accuracy on some CPUs.
13544 UseOneConstNR = !Subtarget.needsTwoConstNR();
13545 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
13546 }
13547 return SDValue();
13548}
13549
13550SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13551 int Enabled,
13552 int &RefinementSteps) const {
13553 EVT VT = Operand.getValueType();
13554 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
13555 (VT == MVT::f64 && Subtarget.hasFRE()) ||
13556 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13557 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13558 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13559 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13560 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
13561 }
13562 return SDValue();
13563}
13564
13565unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13566 // Note: This functionality is used only when unsafe-fp-math is enabled, and
13567 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13568 // enabled for division), this functionality is redundant with the default
13569 // combiner logic (once the division -> reciprocal/multiply transformation
13570 // has taken place). As a result, this matters more for older cores than for
13571 // newer ones.
13572
13573 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13574 // reciprocal if there are two or more FDIVs (for embedded cores with only
13575 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13576 switch (Subtarget.getCPUDirective()) {
13577 default:
13578 return 3;
13579 case PPC::DIR_440:
13580 case PPC::DIR_A2:
13581 case PPC::DIR_E500:
13582 case PPC::DIR_E500mc:
13583 case PPC::DIR_E5500:
13584 return 2;
13585 }
13586}
13587
13588// isConsecutiveLSLoc needs to work even if all adds have not yet been
13589// collapsed, and so we need to look through chains of them.
13591 int64_t& Offset, SelectionDAG &DAG) {
13592 if (DAG.isBaseWithConstantOffset(Loc)) {
13593 Base = Loc.getOperand(0);
13594 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
13595
13596 // The base might itself be a base plus an offset, and if so, accumulate
13597 // that as well.
13599 }
13600}
13601
13603 unsigned Bytes, int Dist,
13604 SelectionDAG &DAG) {
13605 if (VT.getSizeInBits() / 8 != Bytes)
13606 return false;
13607
13608 SDValue BaseLoc = Base->getBasePtr();
13609 if (Loc.getOpcode() == ISD::FrameIndex) {
13610 if (BaseLoc.getOpcode() != ISD::FrameIndex)
13611 return false;
13613 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
13614 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13615 int FS = MFI.getObjectSize(FI);
13616 int BFS = MFI.getObjectSize(BFI);
13617 if (FS != BFS || FS != (int)Bytes) return false;
13618 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
13619 }
13620
13621 SDValue Base1 = Loc, Base2 = BaseLoc;
13622 int64_t Offset1 = 0, Offset2 = 0;
13623 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
13624 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
13625 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13626 return true;
13627
13628 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13629 const GlobalValue *GV1 = nullptr;
13630 const GlobalValue *GV2 = nullptr;
13631 Offset1 = 0;
13632 Offset2 = 0;
13633 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
13634 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
13635 if (isGA1 && isGA2 && GV1 == GV2)
13636 return Offset1 == (Offset2 + Dist*Bytes);
13637 return false;
13638}
13639
13640// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13641// not enforce equality of the chain operands.
13643 unsigned Bytes, int Dist,
13644 SelectionDAG &DAG) {
13645 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
13646 EVT VT = LS->getMemoryVT();
13647 SDValue Loc = LS->getBasePtr();
13648 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
13649 }
13650
13651 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
13652 EVT VT;
13653 switch (N->getConstantOperandVal(1)) {
13654 default: return false;
13655 case Intrinsic::ppc_altivec_lvx:
13656 case Intrinsic::ppc_altivec_lvxl:
13657 case Intrinsic::ppc_vsx_lxvw4x:
13658 case Intrinsic::ppc_vsx_lxvw4x_be:
13659 VT = MVT::v4i32;
13660 break;
13661 case Intrinsic::ppc_vsx_lxvd2x:
13662 case Intrinsic::ppc_vsx_lxvd2x_be:
13663 VT = MVT::v2f64;
13664 break;
13665 case Intrinsic::ppc_altivec_lvebx:
13666 VT = MVT::i8;
13667 break;
13668 case Intrinsic::ppc_altivec_lvehx:
13669 VT = MVT::i16;
13670 break;
13671 case Intrinsic::ppc_altivec_lvewx:
13672 VT = MVT::i32;
13673 break;
13674 }
13675
13676 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
13677 }
13678
13679 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13680 EVT VT;
13681 switch (N->getConstantOperandVal(1)) {
13682 default: return false;
13683 case Intrinsic::ppc_altivec_stvx:
13684 case Intrinsic::ppc_altivec_stvxl:
13685 case Intrinsic::ppc_vsx_stxvw4x:
13686 VT = MVT::v4i32;
13687 break;
13688 case Intrinsic::ppc_vsx_stxvd2x:
13689 VT = MVT::v2f64;
13690 break;
13691 case Intrinsic::ppc_vsx_stxvw4x_be:
13692 VT = MVT::v4i32;
13693 break;
13694 case Intrinsic::ppc_vsx_stxvd2x_be:
13695 VT = MVT::v2f64;
13696 break;
13697 case Intrinsic::ppc_altivec_stvebx:
13698 VT = MVT::i8;
13699 break;
13700 case Intrinsic::ppc_altivec_stvehx:
13701 VT = MVT::i16;
13702 break;
13703 case Intrinsic::ppc_altivec_stvewx:
13704 VT = MVT::i32;
13705 break;
13706 }
13707
13708 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
13709 }
13710
13711 return false;
13712}
13713
13714// Return true is there is a nearyby consecutive load to the one provided
13715// (regardless of alignment). We search up and down the chain, looking though
13716// token factors and other loads (but nothing else). As a result, a true result
13717// indicates that it is safe to create a new consecutive load adjacent to the
13718// load provided.
13720 SDValue Chain = LD->getChain();
13721 EVT VT = LD->getMemoryVT();
13722
13723 SmallSet<SDNode *, 16> LoadRoots;
13724 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
13725 SmallSet<SDNode *, 16> Visited;
13726
13727 // First, search up the chain, branching to follow all token-factor operands.
13728 // If we find a consecutive load, then we're done, otherwise, record all
13729 // nodes just above the top-level loads and token factors.
13730 while (!Queue.empty()) {
13731 SDNode *ChainNext = Queue.pop_back_val();
13732 if (!Visited.insert(ChainNext).second)
13733 continue;
13734
13735 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
13736 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13737 return true;
13738
13739 if (!Visited.count(ChainLD->getChain().getNode()))
13740 Queue.push_back(ChainLD->getChain().getNode());
13741 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
13742 for (const SDUse &O : ChainNext->ops())
13743 if (!Visited.count(O.getNode()))
13744 Queue.push_back(O.getNode());
13745 } else
13746 LoadRoots.insert(ChainNext);
13747 }
13748
13749 // Second, search down the chain, starting from the top-level nodes recorded
13750 // in the first phase. These top-level nodes are the nodes just above all
13751 // loads and token factors. Starting with their uses, recursively look though
13752 // all loads (just the chain uses) and token factors to find a consecutive
13753 // load.
13754 Visited.clear();
13755 Queue.clear();
13756
13757 for (SDNode *I : LoadRoots) {
13758 Queue.push_back(I);
13759
13760 while (!Queue.empty()) {
13761 SDNode *LoadRoot = Queue.pop_back_val();
13762 if (!Visited.insert(LoadRoot).second)
13763 continue;
13764
13765 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
13766 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13767 return true;
13768
13769 for (SDNode *U : LoadRoot->uses())
13770 if (((isa<MemSDNode>(U) &&
13771 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
13772 U->getOpcode() == ISD::TokenFactor) &&
13773 !Visited.count(U))
13774 Queue.push_back(U);
13775 }
13776 }
13777
13778 return false;
13779}
13780
13781/// This function is called when we have proved that a SETCC node can be replaced
13782/// by subtraction (and other supporting instructions) so that the result of
13783/// comparison is kept in a GPR instead of CR. This function is purely for
13784/// codegen purposes and has some flags to guide the codegen process.
13785static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
13786 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
13787 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13788
13789 // Zero extend the operands to the largest legal integer. Originally, they
13790 // must be of a strictly smaller size.
13791 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
13792 DAG.getConstant(Size, DL, MVT::i32));
13793 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
13794 DAG.getConstant(Size, DL, MVT::i32));
13795
13796 // Swap if needed. Depends on the condition code.
13797 if (Swap)
13798 std::swap(Op0, Op1);
13799
13800 // Subtract extended integers.
13801 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
13802
13803 // Move the sign bit to the least significant position and zero out the rest.
13804 // Now the least significant bit carries the result of original comparison.
13805 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
13806 DAG.getConstant(Size - 1, DL, MVT::i32));
13807 auto Final = Shifted;
13808
13809 // Complement the result if needed. Based on the condition code.
13810 if (Complement)
13811 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
13812 DAG.getConstant(1, DL, MVT::i64));
13813
13814 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
13815}
13816
13817SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
13818 DAGCombinerInfo &DCI) const {
13819 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13820
13821 SelectionDAG &DAG = DCI.DAG;
13822 SDLoc DL(N);
13823
13824 // Size of integers being compared has a critical role in the following
13825 // analysis, so we prefer to do this when all types are legal.
13826 if (!DCI.isAfterLegalizeDAG())
13827 return SDValue();
13828
13829 // If all users of SETCC extend its value to a legal integer type
13830 // then we replace SETCC with a subtraction
13831 for (const SDNode *U : N->uses())
13832 if (U->getOpcode() != ISD::ZERO_EXTEND)
13833 return SDValue();
13834
13835 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13836 auto OpSize = N->getOperand(0).getValueSizeInBits();
13837
13839
13840 if (OpSize < Size) {
13841 switch (CC) {
13842 default: break;
13843 case ISD::SETULT:
13844 return generateEquivalentSub(N, Size, false, false, DL, DAG);
13845 case ISD::SETULE:
13846 return generateEquivalentSub(N, Size, true, true, DL, DAG);
13847 case ISD::SETUGT:
13848 return generateEquivalentSub(N, Size, false, true, DL, DAG);
13849 case ISD::SETUGE:
13850 return generateEquivalentSub(N, Size, true, false, DL, DAG);
13851 }
13852 }
13853
13854 return SDValue();
13855}
13856
13857SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
13858 DAGCombinerInfo &DCI) const {
13859 SelectionDAG &DAG = DCI.DAG;
13860 SDLoc dl(N);
13861
13862 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
13863 // If we're tracking CR bits, we need to be careful that we don't have:
13864 // trunc(binary-ops(zext(x), zext(y)))
13865 // or
13866 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
13867 // such that we're unnecessarily moving things into GPRs when it would be
13868 // better to keep them in CR bits.
13869
13870 // Note that trunc here can be an actual i1 trunc, or can be the effective
13871 // truncation that comes from a setcc or select_cc.
13872 if (N->getOpcode() == ISD::TRUNCATE &&
13873 N->getValueType(0) != MVT::i1)
13874 return SDValue();
13875
13876 if (N->getOperand(0).getValueType() != MVT::i32 &&
13877 N->getOperand(0).getValueType() != MVT::i64)
13878 return SDValue();
13879
13880 if (N->getOpcode() == ISD::SETCC ||
13881 N->getOpcode() == ISD::SELECT_CC) {
13882 // If we're looking at a comparison, then we need to make sure that the
13883 // high bits (all except for the first) don't matter the result.
13885 cast<CondCodeSDNode>(N->getOperand(
13886 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
13887 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
13888
13890 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
13891 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
13892 return SDValue();
13893 } else if (ISD::isUnsignedIntSetCC(CC)) {
13894 if (!DAG.MaskedValueIsZero(N->getOperand(0),
13895 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
13896 !DAG.MaskedValueIsZero(N->getOperand(1),
13897 APInt::getHighBitsSet(OpBits, OpBits-1)))
13898 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
13899 : SDValue());
13900 } else {
13901 // This is neither a signed nor an unsigned comparison, just make sure
13902 // that the high bits are equal.
13903 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
13904 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
13905
13906 // We don't really care about what is known about the first bit (if
13907 // anything), so pretend that it is known zero for both to ensure they can
13908 // be compared as constants.
13909 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
13910 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
13911
13912 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
13913 Op1Known.getConstant() != Op2Known.getConstant())
13914 return SDValue();
13915 }
13916 }
13917
13918 // We now know that the higher-order bits are irrelevant, we just need to
13919 // make sure that all of the intermediate operations are bit operations, and
13920 // all inputs are extensions.
13921 if (N->getOperand(0).getOpcode() != ISD::AND &&
13922 N->getOperand(0).getOpcode() != ISD::OR &&
13923 N->getOperand(0).getOpcode() != ISD::XOR &&
13924 N->getOperand(0).getOpcode() != ISD::SELECT &&
13925 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
13926 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
13927 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
13928 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
13929 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
13930 return SDValue();
13931
13932 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
13933 N->getOperand(1).getOpcode() != ISD::AND &&
13934 N->getOperand(1).getOpcode() != ISD::OR &&
13935 N->getOperand(1).getOpcode() != ISD::XOR &&
13936 N->getOperand(1).getOpcode() != ISD::SELECT &&
13937 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
13938 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
13939 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
13940 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
13941 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
13942 return SDValue();
13943
13945 SmallVector<SDValue, 8> BinOps, PromOps;
13947
13948 for (unsigned i = 0; i < 2; ++i) {
13949 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13950 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13951 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13952 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13953 isa<ConstantSDNode>(N->getOperand(i)))
13954 Inputs.push_back(N->getOperand(i));
13955 else
13956 BinOps.push_back(N->getOperand(i));
13957
13958 if (N->getOpcode() == ISD::TRUNCATE)
13959 break;
13960 }
13961
13962 // Visit all inputs, collect all binary operations (and, or, xor and
13963 // select) that are all fed by extensions.
13964 while (!BinOps.empty()) {
13965 SDValue BinOp = BinOps.pop_back_val();
13966
13967 if (!Visited.insert(BinOp.getNode()).second)
13968 continue;
13969
13970 PromOps.push_back(BinOp);
13971
13972 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13973 // The condition of the select is not promoted.
13974 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13975 continue;
13976 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13977 continue;
13978
13979 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13980 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13981 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13982 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13983 isa<ConstantSDNode>(BinOp.getOperand(i))) {
13984 Inputs.push_back(BinOp.getOperand(i));
13985 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13986 BinOp.getOperand(i).getOpcode() == ISD::OR ||
13987 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13988 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13989 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
13990 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13991 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13992 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13993 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
13994 BinOps.push_back(BinOp.getOperand(i));
13995 } else {
13996 // We have an input that is not an extension or another binary
13997 // operation; we'll abort this transformation.
13998 return SDValue();
13999 }
14000 }
14001 }
14002
14003 // Make sure that this is a self-contained cluster of operations (which
14004 // is not quite the same thing as saying that everything has only one
14005 // use).
14006 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14007 if (isa<ConstantSDNode>(Inputs[i]))
14008 continue;
14009
14010 for (const SDNode *User : Inputs[i].getNode()->uses()) {
14011 if (User != N && !Visited.count(User))
14012 return SDValue();
14013
14014 // Make sure that we're not going to promote the non-output-value
14015 // operand(s) or SELECT or SELECT_CC.
14016 // FIXME: Although we could sometimes handle this, and it does occur in
14017 // practice that one of the condition inputs to the select is also one of
14018 // the outputs, we currently can't deal with this.
14019 if (User->getOpcode() == ISD::SELECT) {
14020 if (User->getOperand(0) == Inputs[i])
14021 return SDValue();
14022 } else if (User->getOpcode() == ISD::SELECT_CC) {
14023 if (User->getOperand(0) == Inputs[i] ||
14024 User->getOperand(1) == Inputs[i])
14025 return SDValue();
14026 }
14027 }
14028 }
14029
14030 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14031 for (const SDNode *User : PromOps[i].getNode()->uses()) {
14032 if (User != N && !Visited.count(User))
14033 return SDValue();
14034
14035 // Make sure that we're not going to promote the non-output-value
14036 // operand(s) or SELECT or SELECT_CC.
14037 // FIXME: Although we could sometimes handle this, and it does occur in
14038 // practice that one of the condition inputs to the select is also one of
14039 // the outputs, we currently can't deal with this.
14040 if (User->getOpcode() == ISD::SELECT) {
14041 if (User->getOperand(0) == PromOps[i])
14042 return SDValue();
14043 } else if (User->getOpcode() == ISD::SELECT_CC) {
14044 if (User->getOperand(0) == PromOps[i] ||
14045 User->getOperand(1) == PromOps[i])
14046 return SDValue();
14047 }
14048 }
14049 }
14050
14051 // Replace all inputs with the extension operand.
14052 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14053 // Constants may have users outside the cluster of to-be-promoted nodes,
14054 // and so we need to replace those as we do the promotions.
14055 if (isa<ConstantSDNode>(Inputs[i]))
14056 continue;
14057 else
14058 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
14059 }
14060
14061 std::list<HandleSDNode> PromOpHandles;
14062 for (auto &PromOp : PromOps)
14063 PromOpHandles.emplace_back(PromOp);
14064
14065 // Replace all operations (these are all the same, but have a different
14066 // (i1) return type). DAG.getNode will validate that the types of
14067 // a binary operator match, so go through the list in reverse so that
14068 // we've likely promoted both operands first. Any intermediate truncations or
14069 // extensions disappear.
14070 while (!PromOpHandles.empty()) {
14071 SDValue PromOp = PromOpHandles.back().getValue();
14072 PromOpHandles.pop_back();
14073
14074 if (PromOp.getOpcode() == ISD::TRUNCATE ||
14075 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
14076 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
14077 PromOp.getOpcode() == ISD::ANY_EXTEND) {
14078 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
14079 PromOp.getOperand(0).getValueType() != MVT::i1) {
14080 // The operand is not yet ready (see comment below).
14081 PromOpHandles.emplace_front(PromOp);
14082 continue;
14083 }
14084
14085 SDValue RepValue = PromOp.getOperand(0);
14086 if (isa<ConstantSDNode>(RepValue))
14087 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
14088
14089 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
14090 continue;
14091 }
14092
14093 unsigned C;
14094 switch (PromOp.getOpcode()) {
14095 default: C = 0; break;
14096 case ISD::SELECT: C = 1; break;
14097 case ISD::SELECT_CC: C = 2; break;
14098 }
14099
14100 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14101 PromOp.getOperand(C).getValueType() != MVT::i1) ||
14102 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14103 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
14104 // The to-be-promoted operands of this node have not yet been
14105 // promoted (this should be rare because we're going through the
14106 // list backward, but if one of the operands has several users in
14107 // this cluster of to-be-promoted nodes, it is possible).
14108 PromOpHandles.emplace_front(PromOp);
14109 continue;
14110 }
14111
14113 PromOp.getNode()->op_end());
14114
14115 // If there are any constant inputs, make sure they're replaced now.
14116 for (unsigned i = 0; i < 2; ++i)
14117 if (isa<ConstantSDNode>(Ops[C+i]))
14118 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
14119
14120 DAG.ReplaceAllUsesOfValueWith(PromOp,
14121 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
14122 }
14123
14124 // Now we're left with the initial truncation itself.
14125 if (N->getOpcode() == ISD::TRUNCATE)
14126 return N->getOperand(0);
14127
14128 // Otherwise, this is a comparison. The operands to be compared have just
14129 // changed type (to i1), but everything else is the same.
14130 return SDValue(N, 0);
14131}
14132
14133SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
14134 DAGCombinerInfo &DCI) const {
14135 SelectionDAG &DAG = DCI.DAG;
14136 SDLoc dl(N);
14137
14138 // If we're tracking CR bits, we need to be careful that we don't have:
14139 // zext(binary-ops(trunc(x), trunc(y)))
14140 // or
14141 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14142 // such that we're unnecessarily moving things into CR bits that can more
14143 // efficiently stay in GPRs. Note that if we're not certain that the high
14144 // bits are set as required by the final extension, we still may need to do
14145 // some masking to get the proper behavior.
14146
14147 // This same functionality is important on PPC64 when dealing with
14148 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
14149 // the return values of functions. Because it is so similar, it is handled
14150 // here as well.
14151
14152 if (N->getValueType(0) != MVT::i32 &&
14153 N->getValueType(0) != MVT::i64)
14154 return SDValue();
14155
14156 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
14157 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
14158 return SDValue();
14159
14160 if (N->getOperand(0).getOpcode() != ISD::AND &&
14161 N->getOperand(0).getOpcode() != ISD::OR &&
14162 N->getOperand(0).getOpcode() != ISD::XOR &&
14163 N->getOperand(0).getOpcode() != ISD::SELECT &&
14164 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
14165 return SDValue();
14166
14168 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
14170
14171 // Visit all inputs, collect all binary operations (and, or, xor and
14172 // select) that are all fed by truncations.
14173 while (!BinOps.empty()) {
14174 SDValue BinOp = BinOps.pop_back_val();
14175
14176 if (!Visited.insert(BinOp.getNode()).second)
14177 continue;
14178
14179 PromOps.push_back(BinOp);
14180
14181 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14182 // The condition of the select is not promoted.
14183 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14184 continue;
14185 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14186 continue;
14187
14188 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14189 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14190 Inputs.push_back(BinOp.getOperand(i));
14191 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14192 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14193 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14194 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14195 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
14196 BinOps.push_back(BinOp.getOperand(i));
14197 } else {
14198 // We have an input that is not a truncation or another binary
14199 // operation; we'll abort this transformation.
14200 return SDValue();
14201 }
14202 }
14203 }
14204
14205 // The operands of a select that must be truncated when the select is
14206 // promoted because the operand is actually part of the to-be-promoted set.
14207 DenseMap<SDNode *, EVT> SelectTruncOp[2];
14208
14209 // Make sure that this is a self-contained cluster of operations (which
14210 // is not quite the same thing as saying that everything has only one
14211 // use).
14212 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14213 if (isa<ConstantSDNode>(Inputs[i]))
14214 continue;
14215
14216 for (SDNode *User : Inputs[i].getNode()->uses()) {
14217 if (User != N && !Visited.count(User))
14218 return SDValue();
14219
14220 // If we're going to promote the non-output-value operand(s) or SELECT or
14221 // SELECT_CC, record them for truncation.
14222 if (User->getOpcode() == ISD::SELECT) {
14223 if (User->getOperand(0) == Inputs[i])
14224 SelectTruncOp[0].insert(std::make_pair(User,
14225 User->getOperand(0).getValueType()));
14226 } else if (User->getOpcode() == ISD::SELECT_CC) {
14227 if (User->getOperand(0) == Inputs[i])
14228 SelectTruncOp[0].insert(std::make_pair(User,
14229 User->getOperand(0).getValueType()));
14230 if (User->getOperand(1) == Inputs[i])
14231 SelectTruncOp[1].insert(std::make_pair(User,
14232 User->getOperand(1).getValueType()));
14233 }
14234 }
14235 }
14236
14237 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14238 for (SDNode *User : PromOps[i].getNode()->uses()) {
14239 if (User != N && !Visited.count(User))
14240 return SDValue();
14241
14242 // If we're going to promote the non-output-value operand(s) or SELECT or
14243 // SELECT_CC, record them for truncation.
14244 if (User->getOpcode() == ISD::SELECT) {
14245 if (User->getOperand(0) == PromOps[i])
14246 SelectTruncOp[0].insert(std::make_pair(User,
14247 User->getOperand(0).getValueType()));
14248 } else if (User->getOpcode() == ISD::SELECT_CC) {
14249 if (User->getOperand(0) == PromOps[i])
14250 SelectTruncOp[0].insert(std::make_pair(User,
14251 User->getOperand(0).getValueType()));
14252 if (User->getOperand(1) == PromOps[i])
14253 SelectTruncOp[1].insert(std::make_pair(User,
14254 User->getOperand(1).getValueType()));
14255 }
14256 }
14257 }
14258
14259 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
14260 bool ReallyNeedsExt = false;
14261 if (N->getOpcode() != ISD::ANY_EXTEND) {
14262 // If all of the inputs are not already sign/zero extended, then
14263 // we'll still need to do that at the end.
14264 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14265 if (isa<ConstantSDNode>(Inputs[i]))
14266 continue;
14267
14268 unsigned OpBits =
14269 Inputs[i].getOperand(0).getValueSizeInBits();
14270 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
14271
14272 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
14273 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
14274 APInt::getHighBitsSet(OpBits,
14275 OpBits-PromBits))) ||
14276 (N->getOpcode() == ISD::SIGN_EXTEND &&
14277 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
14278 (OpBits-(PromBits-1)))) {
14279 ReallyNeedsExt = true;
14280 break;
14281 }
14282 }
14283 }
14284
14285 // Replace all inputs, either with the truncation operand, or a
14286 // truncation or extension to the final output type.
14287 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14288 // Constant inputs need to be replaced with the to-be-promoted nodes that
14289 // use them because they might have users outside of the cluster of
14290 // promoted nodes.
14291 if (isa<ConstantSDNode>(Inputs[i]))
14292 continue;
14293
14294 SDValue InSrc = Inputs[i].getOperand(0);
14295 if (Inputs[i].getValueType() == N->getValueType(0))
14296 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
14297 else if (N->getOpcode() == ISD::SIGN_EXTEND)
14298 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14299 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
14300 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14301 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14302 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
14303 else
14304 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14305 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
14306 }
14307
14308 std::list<HandleSDNode> PromOpHandles;
14309 for (auto &PromOp : PromOps)
14310 PromOpHandles.emplace_back(PromOp);
14311
14312 // Replace all operations (these are all the same, but have a different
14313 // (promoted) return type). DAG.getNode will validate that the types of
14314 // a binary operator match, so go through the list in reverse so that
14315 // we've likely promoted both operands first.
14316 while (!PromOpHandles.empty()) {
14317 SDValue PromOp = PromOpHandles.back().getValue();
14318 PromOpHandles.pop_back();
14319
14320 unsigned C;
14321 switch (PromOp.getOpcode()) {
14322 default: C = 0; break;
14323 case ISD::SELECT: C = 1; break;
14324 case ISD::SELECT_CC: C = 2; break;
14325 }
14326
14327 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14328 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
14329 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14330 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
14331 // The to-be-promoted operands of this node have not yet been
14332 // promoted (this should be rare because we're going through the
14333 // list backward, but if one of the operands has several users in
14334 // this cluster of to-be-promoted nodes, it is possible).
14335 PromOpHandles.emplace_front(PromOp);
14336 continue;
14337 }
14338
14339 // For SELECT and SELECT_CC nodes, we do a similar check for any
14340 // to-be-promoted comparison inputs.
14341 if (PromOp.getOpcode() == ISD::SELECT ||
14342 PromOp.getOpcode() == ISD::SELECT_CC) {
14343 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
14344 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
14345 (SelectTruncOp[1].count(PromOp.getNode()) &&
14346 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
14347 PromOpHandles.emplace_front(PromOp);
14348 continue;
14349 }
14350 }
14351
14353 PromOp.getNode()->op_end());
14354
14355 // If this node has constant inputs, then they'll need to be promoted here.
14356 for (unsigned i = 0; i < 2; ++i) {
14357 if (!isa<ConstantSDNode>(Ops[C+i]))
14358 continue;
14359 if (Ops[C+i].getValueType() == N->getValueType(0))
14360 continue;
14361
14362 if (N->getOpcode() == ISD::SIGN_EXTEND)
14363 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14364 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14365 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14366 else
14367 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14368 }
14369
14370 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14371 // truncate them again to the original value type.
14372 if (PromOp.getOpcode() == ISD::SELECT ||
14373 PromOp.getOpcode() == ISD::SELECT_CC) {
14374 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
14375 if (SI0 != SelectTruncOp[0].end())
14376 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
14377 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
14378 if (SI1 != SelectTruncOp[1].end())
14379 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
14380 }
14381
14382 DAG.ReplaceAllUsesOfValueWith(PromOp,
14383 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
14384 }
14385
14386 // Now we're left with the initial extension itself.
14387 if (!ReallyNeedsExt)
14388 return N->getOperand(0);
14389
14390 // To zero extend, just mask off everything except for the first bit (in the
14391 // i1 case).
14392 if (N->getOpcode() == ISD::ZERO_EXTEND)
14393 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
14395 N->getValueSizeInBits(0), PromBits),
14396 dl, N->getValueType(0)));
14397
14398 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14399 "Invalid extension type");
14400 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
14401 SDValue ShiftCst =
14402 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14403 return DAG.getNode(
14404 ISD::SRA, dl, N->getValueType(0),
14405 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
14406 ShiftCst);
14407}
14408
14409SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14410 DAGCombinerInfo &DCI) const {
14411 assert(N->getOpcode() == ISD::SETCC &&
14412 "Should be called with a SETCC node");
14413
14414 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14415 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
14416 SDValue LHS = N->getOperand(0);
14417 SDValue RHS = N->getOperand(1);
14418
14419 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14420 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
14421 LHS.hasOneUse())
14422 std::swap(LHS, RHS);
14423
14424 // x == 0-y --> x+y == 0
14425 // x != 0-y --> x+y != 0
14426 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
14427 RHS.hasOneUse()) {
14428 SDLoc DL(N);
14429 SelectionDAG &DAG = DCI.DAG;
14430 EVT VT = N->getValueType(0);
14431 EVT OpVT = LHS.getValueType();
14432 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
14433 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
14434 }
14435 }
14436
14437 return DAGCombineTruncBoolExt(N, DCI);
14438}
14439
14440// Is this an extending load from an f32 to an f64?
14441static bool isFPExtLoad(SDValue Op) {
14442 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14443 return LD->getExtensionType() == ISD::EXTLOAD &&
14444 Op.getValueType() == MVT::f64;
14445 return false;
14446}
14447
14448/// Reduces the number of fp-to-int conversion when building a vector.
14449///
14450/// If this vector is built out of floating to integer conversions,
14451/// transform it to a vector built out of floating point values followed by a
14452/// single floating to integer conversion of the vector.
14453/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
14454/// becomes (fptosi (build_vector ($A, $B, ...)))
14455SDValue PPCTargetLowering::
14456combineElementTruncationToVectorTruncation(SDNode *N,
14457 DAGCombinerInfo &DCI) const {
14458 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14459 "Should be called with a BUILD_VECTOR node");
14460
14461 SelectionDAG &DAG = DCI.DAG;
14462 SDLoc dl(N);
14463
14464 SDValue FirstInput = N->getOperand(0);
14465 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14466 "The input operand must be an fp-to-int conversion.");
14467
14468 // This combine happens after legalization so the fp_to_[su]i nodes are
14469 // already converted to PPCSISD nodes.
14470 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14471 if (FirstConversion == PPCISD::FCTIDZ ||
14472 FirstConversion == PPCISD::FCTIDUZ ||
14473 FirstConversion == PPCISD::FCTIWZ ||
14474 FirstConversion == PPCISD::FCTIWUZ) {
14475 bool IsSplat = true;
14476 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14477 FirstConversion == PPCISD::FCTIWUZ;
14478 EVT SrcVT = FirstInput.getOperand(0).getValueType();
14480 EVT TargetVT = N->getValueType(0);
14481 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14482 SDValue NextOp = N->getOperand(i);
14483 if (NextOp.getOpcode() != PPCISD::MFVSR)
14484 return SDValue();
14485 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14486 if (NextConversion != FirstConversion)
14487 return SDValue();
14488 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14489 // This is not valid if the input was originally double precision. It is
14490 // also not profitable to do unless this is an extending load in which
14491 // case doing this combine will allow us to combine consecutive loads.
14492 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14493 return SDValue();
14494 if (N->getOperand(i) != FirstInput)
14495 IsSplat = false;
14496 }
14497
14498 // If this is a splat, we leave it as-is since there will be only a single
14499 // fp-to-int conversion followed by a splat of the integer. This is better
14500 // for 32-bit and smaller ints and neutral for 64-bit ints.
14501 if (IsSplat)
14502 return SDValue();
14503
14504 // Now that we know we have the right type of node, get its operands
14505 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14506 SDValue In = N->getOperand(i).getOperand(0);
14507 if (Is32Bit) {
14508 // For 32-bit values, we need to add an FP_ROUND node (if we made it
14509 // here, we know that all inputs are extending loads so this is safe).
14510 if (In.isUndef())
14511 Ops.push_back(DAG.getUNDEF(SrcVT));
14512 else {
14513 SDValue Trunc =
14514 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
14515 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
14516 Ops.push_back(Trunc);
14517 }
14518 } else
14519 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
14520 }
14521
14522 unsigned Opcode;
14523 if (FirstConversion == PPCISD::FCTIDZ ||
14524 FirstConversion == PPCISD::FCTIWZ)
14525 Opcode = ISD::FP_TO_SINT;
14526 else
14527 Opcode = ISD::FP_TO_UINT;
14528
14529 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
14530 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
14531 return DAG.getNode(Opcode, dl, TargetVT, BV);
14532 }
14533 return SDValue();
14534}
14535
14536/// Reduce the number of loads when building a vector.
14537///
14538/// Building a vector out of multiple loads can be converted to a load
14539/// of the vector type if the loads are consecutive. If the loads are
14540/// consecutive but in descending order, a shuffle is added at the end
14541/// to reorder the vector.
14543 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14544 "Should be called with a BUILD_VECTOR node");
14545
14546 SDLoc dl(N);
14547
14548 // Return early for non byte-sized type, as they can't be consecutive.
14549 if (!N->getValueType(0).getVectorElementType().isByteSized())
14550 return SDValue();
14551
14552 bool InputsAreConsecutiveLoads = true;
14553 bool InputsAreReverseConsecutive = true;
14554 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
14555 SDValue FirstInput = N->getOperand(0);
14556 bool IsRoundOfExtLoad = false;
14557 LoadSDNode *FirstLoad = nullptr;
14558
14559 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14560 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14561 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
14562 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
14563 }
14564 // Not a build vector of (possibly fp_rounded) loads.
14565 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
14566 N->getNumOperands() == 1)
14567 return SDValue();
14568
14569 if (!IsRoundOfExtLoad)
14570 FirstLoad = cast<LoadSDNode>(FirstInput);
14571
14573 InputLoads.push_back(FirstLoad);
14574 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
14575 // If any inputs are fp_round(extload), they all must be.
14576 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
14577 return SDValue();
14578
14579 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
14580 N->getOperand(i);
14581 if (NextInput.getOpcode() != ISD::LOAD)
14582 return SDValue();
14583
14584 SDValue PreviousInput =
14585 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14586 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
14587 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
14588
14589 // If any inputs are fp_round(extload), they all must be.
14590 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14591 return SDValue();
14592
14593 // We only care about regular loads. The PPC-specific load intrinsics
14594 // will not lead to a merge opportunity.
14595 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
14596 InputsAreConsecutiveLoads = false;
14597 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
14598 InputsAreReverseConsecutive = false;
14599
14600 // Exit early if the loads are neither consecutive nor reverse consecutive.
14601 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14602 return SDValue();
14603 InputLoads.push_back(LD2);
14604 }
14605
14606 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
14607 "The loads cannot be both consecutive and reverse consecutive.");
14608
14609 SDValue WideLoad;
14610 SDValue ReturnSDVal;
14611 if (InputsAreConsecutiveLoads) {
14612 assert(FirstLoad && "Input needs to be a LoadSDNode.");
14613 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
14614 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14615 FirstLoad->getAlign());
14616 ReturnSDVal = WideLoad;
14617 } else if (InputsAreReverseConsecutive) {
14618 LoadSDNode *LastLoad = InputLoads.back();
14619 assert(LastLoad && "Input needs to be a LoadSDNode.");
14620 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
14621 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
14622 LastLoad->getAlign());
14624 for (int i = N->getNumOperands() - 1; i >= 0; i--)
14625 Ops.push_back(i);
14626
14627 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
14628 DAG.getUNDEF(N->getValueType(0)), Ops);
14629 } else
14630 return SDValue();
14631
14632 for (auto *LD : InputLoads)
14633 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
14634 return ReturnSDVal;
14635}
14636
14637// This function adds the required vector_shuffle needed to get
14638// the elements of the vector extract in the correct position
14639// as specified by the CorrectElems encoding.
14641 SDValue Input, uint64_t Elems,
14642 uint64_t CorrectElems) {
14643 SDLoc dl(N);
14644
14645 unsigned NumElems = Input.getValueType().getVectorNumElements();
14646 SmallVector<int, 16> ShuffleMask(NumElems, -1);
14647
14648 // Knowing the element indices being extracted from the original
14649 // vector and the order in which they're being inserted, just put
14650 // them at element indices required for the instruction.
14651 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14652 if (DAG.getDataLayout().isLittleEndian())
14653 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14654 else
14655 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14656 CorrectElems = CorrectElems >> 8;
14657 Elems = Elems >> 8;
14658 }
14659
14660 SDValue Shuffle =
14661 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
14662 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
14663
14664 EVT VT = N->getValueType(0);
14665 SDValue Conv = DAG.getBitcast(VT, Shuffle);
14666
14667 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
14670 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
14671 DAG.getValueType(ExtVT));
14672}
14673
14674// Look for build vector patterns where input operands come from sign
14675// extended vector_extract elements of specific indices. If the correct indices
14676// aren't used, add a vector shuffle to fix up the indices and create
14677// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14678// during instruction selection.
14680 // This array encodes the indices that the vector sign extend instructions
14681 // extract from when extending from one type to another for both BE and LE.
14682 // The right nibble of each byte corresponds to the LE incides.
14683 // and the left nibble of each byte corresponds to the BE incides.
14684 // For example: 0x3074B8FC byte->word
14685 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14686 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14687 // For example: 0x000070F8 byte->double word
14688 // For LE: the allowed indices are: 0x0,0x8
14689 // For BE: the allowed indices are: 0x7,0xF
14690 uint64_t TargetElems[] = {
14691 0x3074B8FC, // b->w
14692 0x000070F8, // b->d
14693 0x10325476, // h->w
14694 0x00003074, // h->d
14695 0x00001032, // w->d
14696 };
14697
14698 uint64_t Elems = 0;
14699 int Index;
14700 SDValue Input;
14701
14702 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14703 if (!Op)
14704 return false;
14705 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14706 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14707 return false;
14708
14709 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14710 // of the right width.
14711 SDValue Extract = Op.getOperand(0);
14712 if (Extract.getOpcode() == ISD::ANY_EXTEND)
14713 Extract = Extract.getOperand(0);
14714 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14715 return false;
14716
14717 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
14718 if (!ExtOp)
14719 return false;
14720
14721 Index = ExtOp->getZExtValue();
14722 if (Input && Input != Extract.getOperand(0))
14723 return false;
14724
14725 if (!Input)
14726 Input = Extract.getOperand(0);
14727
14728 Elems = Elems << 8;
14729 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
14730 Elems |= Index;
14731
14732 return true;
14733 };
14734
14735 // If the build vector operands aren't sign extended vector extracts,
14736 // of the same input vector, then return.
14737 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14738 if (!isSExtOfVecExtract(N->getOperand(i))) {
14739 return SDValue();
14740 }
14741 }
14742
14743 // If the vector extract indicies are not correct, add the appropriate
14744 // vector_shuffle.
14745 int TgtElemArrayIdx;
14746 int InputSize = Input.getValueType().getScalarSizeInBits();
14747 int OutputSize = N->getValueType(0).getScalarSizeInBits();
14748 if (InputSize + OutputSize == 40)
14749 TgtElemArrayIdx = 0;
14750 else if (InputSize + OutputSize == 72)
14751 TgtElemArrayIdx = 1;
14752 else if (InputSize + OutputSize == 48)
14753 TgtElemArrayIdx = 2;
14754 else if (InputSize + OutputSize == 80)
14755 TgtElemArrayIdx = 3;
14756 else if (InputSize + OutputSize == 96)
14757 TgtElemArrayIdx = 4;
14758 else
14759 return SDValue();
14760
14761 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14762 CorrectElems = DAG.getDataLayout().isLittleEndian()
14763 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14764 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14765 if (Elems != CorrectElems) {
14766 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14767 }
14768
14769 // Regular lowering will catch cases where a shuffle is not needed.
14770 return SDValue();
14771}
14772
14773// Look for the pattern of a load from a narrow width to i128, feeding
14774// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
14775// (LXVRZX). This node represents a zero extending load that will be matched
14776// to the Load VSX Vector Rightmost instructions.
14778 SDLoc DL(N);
14779
14780 // This combine is only eligible for a BUILD_VECTOR of v1i128.
14781 if (N->getValueType(0) != MVT::v1i128)
14782 return SDValue();
14783
14784 SDValue Operand = N->getOperand(0);
14785 // Proceed with the transformation if the operand to the BUILD_VECTOR
14786 // is a load instruction.
14787 if (Operand.getOpcode() != ISD::LOAD)
14788 return SDValue();
14789
14790 auto *LD = cast<LoadSDNode>(Operand);
14791 EVT MemoryType = LD->getMemoryVT();
14792
14793 // This transformation is only valid if the we are loading either a byte,
14794 // halfword, word, or doubleword.
14795 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
14796 MemoryType == MVT::i32 || MemoryType == MVT::i64;
14797
14798 // Ensure that the load from the narrow width is being zero extended to i128.
14799 if (!ValidLDType ||
14800 (LD->getExtensionType() != ISD::ZEXTLOAD &&
14801 LD->getExtensionType() != ISD::EXTLOAD))
14802 return SDValue();
14803
14804 SDValue LoadOps[] = {
14805 LD->getChain(), LD->getBasePtr(),
14806 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
14807
14809 DAG.getVTList(MVT::v1i128, MVT::Other),
14810 LoadOps, MemoryType, LD->getMemOperand());
14811}
14812
14813SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
14814 DAGCombinerInfo &DCI) const {
14815 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14816 "Should be called with a BUILD_VECTOR node");
14817
14818 SelectionDAG &DAG = DCI.DAG;
14819 SDLoc dl(N);
14820
14821 if (!Subtarget.hasVSX())
14822 return SDValue();
14823
14824 // The target independent DAG combiner will leave a build_vector of
14825 // float-to-int conversions intact. We can generate MUCH better code for
14826 // a float-to-int conversion of a vector of floats.
14827 SDValue FirstInput = N->getOperand(0);
14828 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
14829 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
14830 if (Reduced)
14831 return Reduced;
14832 }
14833
14834 // If we're building a vector out of consecutive loads, just load that
14835 // vector type.
14836 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
14837 if (Reduced)
14838 return Reduced;
14839
14840 // If we're building a vector out of extended elements from another vector
14841 // we have P9 vector integer extend instructions. The code assumes legal
14842 // input types (i.e. it can't handle things like v4i16) so do not run before
14843 // legalization.
14844 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
14845 Reduced = combineBVOfVecSExt(N, DAG);
14846 if (Reduced)
14847 return Reduced;
14848 }
14849
14850 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
14851 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
14852 // is a load from <valid narrow width> to i128.
14853 if (Subtarget.isISA3_1()) {
14854 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
14855 if (BVOfZLoad)
14856 return BVOfZLoad;
14857 }
14858
14859 if (N->getValueType(0) != MVT::v2f64)
14860 return SDValue();
14861
14862 // Looking for:
14863 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
14864 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
14865 FirstInput.getOpcode() != ISD::UINT_TO_FP)
14866 return SDValue();
14867 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
14868 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
14869 return SDValue();
14870 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
14871 return SDValue();
14872
14873 SDValue Ext1 = FirstInput.getOperand(0);
14874 SDValue Ext2 = N->getOperand(1).getOperand(0);
14875 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14877 return SDValue();
14878
14879 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
14880 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
14881 if (!Ext1Op || !Ext2Op)
14882 return SDValue();
14883 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
14884 Ext1.getOperand(0) != Ext2.getOperand(0))
14885 return SDValue();
14886
14887 int FirstElem = Ext1Op->getZExtValue();
14888 int SecondElem = Ext2Op->getZExtValue();
14889 int SubvecIdx;
14890 if (FirstElem == 0 && SecondElem == 1)
14891 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
14892 else if (FirstElem == 2 && SecondElem == 3)
14893 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
14894 else
14895 return SDValue();
14896
14897 SDValue SrcVec = Ext1.getOperand(0);
14898 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
14900 return DAG.getNode(NodeType, dl, MVT::v2f64,
14901 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
14902}
14903
14904SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
14905 DAGCombinerInfo &DCI) const {
14906 assert((N->getOpcode() == ISD::SINT_TO_FP ||
14907 N->getOpcode() == ISD::UINT_TO_FP) &&
14908 "Need an int -> FP conversion node here");
14909
14910 if (useSoftFloat() || !Subtarget.has64BitSupport())
14911 return SDValue();
14912
14913 SelectionDAG &DAG = DCI.DAG;
14914 SDLoc dl(N);
14915 SDValue Op(N, 0);
14916
14917 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
14918 // from the hardware.
14919 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
14920 return SDValue();
14921 if (!Op.getOperand(0).getValueType().isSimple())
14922 return SDValue();
14923 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
14924 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
14925 return SDValue();
14926
14927 SDValue FirstOperand(Op.getOperand(0));
14928 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
14929 (FirstOperand.getValueType() == MVT::i8 ||
14930 FirstOperand.getValueType() == MVT::i16);
14931 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
14932 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
14933 bool DstDouble = Op.getValueType() == MVT::f64;
14934 unsigned ConvOp = Signed ?
14935 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
14936 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
14937 SDValue WidthConst =
14938 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
14939 dl, false);
14940 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
14941 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
14943 DAG.getVTList(MVT::f64, MVT::Other),
14944 Ops, MVT::i8, LDN->getMemOperand());
14945
14946 // For signed conversion, we need to sign-extend the value in the VSR
14947 if (Signed) {
14948 SDValue ExtOps[] = { Ld, WidthConst };
14949 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
14950 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
14951 } else
14952 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
14953 }
14954
14955
14956 // For i32 intermediate values, unfortunately, the conversion functions
14957 // leave the upper 32 bits of the value are undefined. Within the set of
14958 // scalar instructions, we have no method for zero- or sign-extending the
14959 // value. Thus, we cannot handle i32 intermediate values here.
14960 if (Op.getOperand(0).getValueType() == MVT::i32)
14961 return SDValue();
14962
14963 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
14964 "UINT_TO_FP is supported only with FPCVT");
14965
14966 // If we have FCFIDS, then use it when converting to single-precision.
14967 // Otherwise, convert to double-precision and then round.
14968 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14969 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
14971 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
14972 : PPCISD::FCFID);
14973 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14974 ? MVT::f32
14975 : MVT::f64;
14976
14977 // If we're converting from a float, to an int, and back to a float again,
14978 // then we don't need the store/load pair at all.
14979 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
14980 Subtarget.hasFPCVT()) ||
14981 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
14982 SDValue Src = Op.getOperand(0).getOperand(0);
14983 if (Src.getValueType() == MVT::f32) {
14984 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
14985 DCI.AddToWorklist(Src.getNode());
14986 } else if (Src.getValueType() != MVT::f64) {
14987 // Make sure that we don't pick up a ppc_fp128 source value.
14988 return SDValue();
14989 }
14990
14991 unsigned FCTOp =
14992 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
14994
14995 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
14996 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
14997
14998 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
14999 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
15000 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
15001 DCI.AddToWorklist(FP.getNode());
15002 }
15003
15004 return FP;
15005 }
15006
15007 return SDValue();
15008}
15009
15010// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15011// builtins) into loads with swaps.
15013 DAGCombinerInfo &DCI) const {
15014 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
15015 // load combines.
15016 if (DCI.isBeforeLegalizeOps())
15017 return SDValue();
15018
15019 SelectionDAG &DAG = DCI.DAG;
15020 SDLoc dl(N);
15021 SDValue Chain;
15022 SDValue Base;
15023 MachineMemOperand *MMO;
15024
15025 switch (N->getOpcode()) {
15026 default:
15027 llvm_unreachable("Unexpected opcode for little endian VSX load");
15028 case ISD::LOAD: {
15029 LoadSDNode *LD = cast<LoadSDNode>(N);
15030 Chain = LD->getChain();
15031 Base = LD->getBasePtr();
15032 MMO = LD->getMemOperand();
15033 // If the MMO suggests this isn't a load of a full vector, leave
15034 // things alone. For a built-in, we have to make the change for
15035 // correctness, so if there is a size problem that will be a bug.
15036 if (MMO->getSize() < 16)
15037 return SDValue();
15038 break;
15039 }
15041 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15042 Chain = Intrin->getChain();
15043 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
15044 // us what we want. Get operand 2 instead.
15045 Base = Intrin->getOperand(2);
15046 MMO = Intrin->getMemOperand();
15047 break;
15048 }
15049 }
15050
15051 MVT VecTy = N->getValueType(0).getSimpleVT();
15052
15053 SDValue LoadOps[] = { Chain, Base };
15055 DAG.getVTList(MVT::v2f64, MVT::Other),
15056 LoadOps, MVT::v2f64, MMO);
15057
15058 DCI.AddToWorklist(Load.getNode());
15059 Chain = Load.getValue(1);
15060 SDValue Swap = DAG.getNode(
15061 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
15062 DCI.AddToWorklist(Swap.getNode());
15063
15064 // Add a bitcast if the resulting load type doesn't match v2f64.
15065 if (VecTy != MVT::v2f64) {
15066 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
15067 DCI.AddToWorklist(N.getNode());
15068 // Package {bitcast value, swap's chain} to match Load's shape.
15069 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
15070 N, Swap.getValue(1));
15071 }
15072
15073 return Swap;
15074}
15075
15076// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15077// builtins) into stores with swaps.
15079 DAGCombinerInfo &DCI) const {
15080 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
15081 // store combines.
15082 if (DCI.isBeforeLegalizeOps())
15083 return SDValue();
15084
15085 SelectionDAG &DAG = DCI.DAG;
15086 SDLoc dl(N);
15087 SDValue Chain;
15088 SDValue Base;
15089 unsigned SrcOpnd;
15090 MachineMemOperand *MMO;
15091
15092 switch (N->getOpcode()) {
15093 default:
15094 llvm_unreachable("Unexpected opcode for little endian VSX store");
15095 case ISD::STORE: {
15096 StoreSDNode *ST = cast<StoreSDNode>(N);
15097 Chain = ST->getChain();
15098 Base = ST->getBasePtr();
15099 MMO = ST->getMemOperand();
15100 SrcOpnd = 1;
15101 // If the MMO suggests this isn't a store of a full vector, leave
15102 // things alone. For a built-in, we have to make the change for
15103 // correctness, so if there is a size problem that will be a bug.
15104 if (MMO->getSize() < 16)
15105 return SDValue();
15106 break;
15107 }
15108 case ISD::INTRINSIC_VOID: {
15109 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15110 Chain = Intrin->getChain();
15111 // Intrin->getBasePtr() oddly does not get what we want.
15112 Base = Intrin->getOperand(3);
15113 MMO = Intrin->getMemOperand();
15114 SrcOpnd = 2;
15115 break;
15116 }
15117 }
15118
15119 SDValue Src = N->getOperand(SrcOpnd);
15120 MVT VecTy = Src.getValueType().getSimpleVT();
15121
15122 // All stores are done as v2f64 and possible bit cast.
15123 if (VecTy != MVT::v2f64) {
15124 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
15125 DCI.AddToWorklist(Src.getNode());
15126 }
15127
15128 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
15129 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
15130 DCI.AddToWorklist(Swap.getNode());
15131 Chain = Swap.getValue(1);
15132 SDValue StoreOps[] = { Chain, Swap, Base };
15134 DAG.getVTList(MVT::Other),
15135 StoreOps, VecTy, MMO);
15136 DCI.AddToWorklist(Store.getNode());
15137 return Store;
15138}
15139
15140// Handle DAG combine for STORE (FP_TO_INT F).
15141SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
15142 DAGCombinerInfo &DCI) const {
15143 SelectionDAG &DAG = DCI.DAG;
15144 SDLoc dl(N);
15145 unsigned Opcode = N->getOperand(1).getOpcode();
15146 (void)Opcode;
15147 bool Strict = N->getOperand(1)->isStrictFPOpcode();
15148
15149 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15150 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
15151 && "Not a FP_TO_INT Instruction!");
15152
15153 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
15154 EVT Op1VT = N->getOperand(1).getValueType();
15155 EVT ResVT = Val.getValueType();
15156
15157 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
15158 return SDValue();
15159
15160 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
15161 bool ValidTypeForStoreFltAsInt =
15162 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
15163 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
15164
15165 // TODO: Lower conversion from f128 on all VSX targets
15166 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
15167 return SDValue();
15168
15169 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
15170 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
15171 return SDValue();
15172
15173 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
15174
15175 // Set number of bytes being converted.
15176 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
15177 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
15178 DAG.getIntPtrConstant(ByteSize, dl, false),
15179 DAG.getValueType(Op1VT)};
15180
15182 DAG.getVTList(MVT::Other), Ops,
15183 cast<StoreSDNode>(N)->getMemoryVT(),
15184 cast<StoreSDNode>(N)->getMemOperand());
15185
15186 return Val;
15187}
15188
15189static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
15190 // Check that the source of the element keeps flipping
15191 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15192 bool PrevElemFromFirstVec = Mask[0] < NumElts;
15193 for (int i = 1, e = Mask.size(); i < e; i++) {
15194 if (PrevElemFromFirstVec && Mask[i] < NumElts)
15195 return false;
15196 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
15197 return false;
15198 PrevElemFromFirstVec = !PrevElemFromFirstVec;
15199 }
15200 return true;
15201}
15202
15203static bool isSplatBV(SDValue Op) {
15204 if (Op.getOpcode() != ISD::BUILD_VECTOR)
15205 return false;
15206 SDValue FirstOp;
15207
15208 // Find first non-undef input.
15209 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
15210 FirstOp = Op.getOperand(i);
15211 if (!FirstOp.isUndef())
15212 break;
15213 }
15214
15215 // All inputs are undef or the same as the first non-undef input.
15216 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
15217 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
15218 return false;
15219 return true;
15220}
15221
15223 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15224 return Op;
15225 if (Op.getOpcode() != ISD::BITCAST)
15226 return SDValue();
15227 Op = Op.getOperand(0);
15228 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15229 return Op;
15230 return SDValue();
15231}
15232
15233// Fix up the shuffle mask to account for the fact that the result of
15234// scalar_to_vector is not in lane zero. This just takes all values in
15235// the ranges specified by the min/max indices and adds the number of
15236// elements required to ensure each element comes from the respective
15237// position in the valid lane.
15238// On little endian, that's just the corresponding element in the other
15239// half of the vector. On big endian, it is in the same half but right
15240// justified rather than left justified in that half.
15242 int LHSMaxIdx, int RHSMinIdx,
15243 int RHSMaxIdx, int HalfVec,
15244 unsigned ValidLaneWidth,
15245 const PPCSubtarget &Subtarget) {
15246 for (int i = 0, e = ShuffV.size(); i < e; i++) {
15247 int Idx = ShuffV[i];
15248 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15249 ShuffV[i] +=
15250 Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15251 }
15252}
15253
15254// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15255// the original is:
15256// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15257// In such a case, just change the shuffle mask to extract the element
15258// from the permuted index.
15260 const PPCSubtarget &Subtarget) {
15261 SDLoc dl(OrigSToV);
15262 EVT VT = OrigSToV.getValueType();
15263 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15264 "Expecting a SCALAR_TO_VECTOR here");
15265 SDValue Input = OrigSToV.getOperand(0);
15266
15267 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15268 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
15269 SDValue OrigVector = Input.getOperand(0);
15270
15271 // Can't handle non-const element indices or different vector types
15272 // for the input to the extract and the output of the scalar_to_vector.
15273 if (Idx && VT == OrigVector.getValueType()) {
15274 unsigned NumElts = VT.getVectorNumElements();
15275 assert(
15276 NumElts > 1 &&
15277 "Cannot produce a permuted scalar_to_vector for one element vector");
15278 SmallVector<int, 16> NewMask(NumElts, -1);
15279 unsigned ResultInElt = NumElts / 2;
15280 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
15281 NewMask[ResultInElt] = Idx->getZExtValue();
15282 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
15283 }
15284 }
15285 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
15286 OrigSToV.getOperand(0));
15287}
15288
15289// On little endian subtargets, combine shuffles such as:
15290// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15291// into:
15292// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15293// because the latter can be matched to a single instruction merge.
15294// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15295// to put the value into element zero. Adjust the shuffle mask so that the
15296// vector can remain in permuted form (to prevent a swap prior to a shuffle).
15297// On big endian targets, this is still useful for SCALAR_TO_VECTOR
15298// nodes with elements smaller than doubleword because all the ways
15299// of getting scalar data into a vector register put the value in the
15300// rightmost element of the left half of the vector.
15301SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15302 SelectionDAG &DAG) const {
15303 SDValue LHS = SVN->getOperand(0);
15304 SDValue RHS = SVN->getOperand(1);
15305 auto Mask = SVN->getMask();
15306 int NumElts = LHS.getValueType().getVectorNumElements();
15307 SDValue Res(SVN, 0);
15308 SDLoc dl(SVN);
15309 bool IsLittleEndian = Subtarget.isLittleEndian();
15310
15311 // On big endian targets this is only useful for subtargets with direct moves.
15312 // On little endian targets it would be useful for all subtargets with VSX.
15313 // However adding special handling for LE subtargets without direct moves
15314 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15315 // which includes direct moves.
15316 if (!Subtarget.hasDirectMove())
15317 return Res;
15318
15319 // If this is not a shuffle of a shuffle and the first element comes from
15320 // the second vector, canonicalize to the commuted form. This will make it
15321 // more likely to match one of the single instruction patterns.
15322 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15323 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15324 std::swap(LHS, RHS);
15325 Res = DAG.getCommutedVectorShuffle(*SVN);
15326 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15327 }
15328
15329 // Adjust the shuffle mask if either input vector comes from a
15330 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15331 // form (to prevent the need for a swap).
15332 SmallVector<int, 16> ShuffV(Mask);
15333 SDValue SToVLHS = isScalarToVec(LHS);
15334 SDValue SToVRHS = isScalarToVec(RHS);
15335 if (SToVLHS || SToVRHS) {
15336 // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15337 // same type and have differing element sizes, then do not perform
15338 // the following transformation. The current transformation for
15339 // SCALAR_TO_VECTOR assumes that both input vectors have the same
15340 // element size. This will be updated in the future to account for
15341 // differing sizes of the LHS and RHS.
15342 if (SToVLHS && SToVRHS &&
15343 (SToVLHS.getValueType().getScalarSizeInBits() !=
15344 SToVRHS.getValueType().getScalarSizeInBits()))
15345 return Res;
15346
15347 int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15348 : SToVRHS.getValueType().getVectorNumElements();
15349 int NumEltsOut = ShuffV.size();
15350 // The width of the "valid lane" (i.e. the lane that contains the value that
15351 // is vectorized) needs to be expressed in terms of the number of elements
15352 // of the shuffle. It is thereby the ratio of the values before and after
15353 // any bitcast.
15354 unsigned ValidLaneWidth =
15355 SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15356 LHS.getValueType().getScalarSizeInBits()
15357 : SToVRHS.getValueType().getScalarSizeInBits() /
15358 RHS.getValueType().getScalarSizeInBits();
15359
15360 // Initially assume that neither input is permuted. These will be adjusted
15361 // accordingly if either input is.
15362 int LHSMaxIdx = -1;
15363 int RHSMinIdx = -1;
15364 int RHSMaxIdx = -1;
15365 int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15366
15367 // Get the permuted scalar to vector nodes for the source(s) that come from
15368 // ISD::SCALAR_TO_VECTOR.
15369 // On big endian systems, this only makes sense for element sizes smaller
15370 // than 64 bits since for 64-bit elements, all instructions already put
15371 // the value into element zero. Since scalar size of LHS and RHS may differ
15372 // after isScalarToVec, this should be checked using their own sizes.
15373 if (SToVLHS) {
15374 if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15375 return Res;
15376 // Set up the values for the shuffle vector fixup.
15377 LHSMaxIdx = NumEltsOut / NumEltsIn;
15378 SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15379 if (SToVLHS.getValueType() != LHS.getValueType())
15380 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15381 LHS = SToVLHS;
15382 }
15383 if (SToVRHS) {
15384 if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15385 return Res;
15386 RHSMinIdx = NumEltsOut;
15387 RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15388 SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15389 if (SToVRHS.getValueType() != RHS.getValueType())
15390 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15391 RHS = SToVRHS;
15392 }
15393
15394 // Fix up the shuffle mask to reflect where the desired element actually is.
15395 // The minimum and maximum indices that correspond to element zero for both
15396 // the LHS and RHS are computed and will control which shuffle mask entries
15397 // are to be changed. For example, if the RHS is permuted, any shuffle mask
15398 // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15399 fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15400 HalfVec, ValidLaneWidth, Subtarget);
15401 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15402
15403 // We may have simplified away the shuffle. We won't be able to do anything
15404 // further with it here.
15405 if (!isa<ShuffleVectorSDNode>(Res))
15406 return Res;
15407 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15408 }
15409
15410 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15411 // The common case after we commuted the shuffle is that the RHS is a splat
15412 // and we have elements coming in from the splat at indices that are not
15413 // conducive to using a merge.
15414 // Example:
15415 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15416 if (!isSplatBV(TheSplat))
15417 return Res;
15418
15419 // We are looking for a mask such that all even elements are from
15420 // one vector and all odd elements from the other.
15421 if (!isAlternatingShuffMask(Mask, NumElts))
15422 return Res;
15423
15424 // Adjust the mask so we are pulling in the same index from the splat
15425 // as the index from the interesting vector in consecutive elements.
15426 if (IsLittleEndian) {
15427 // Example (even elements from first vector):
15428 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15429 if (Mask[0] < NumElts)
15430 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15431 if (ShuffV[i] < 0)
15432 continue;
15433 // If element from non-splat is undef, pick first element from splat.
15434 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
15435 }
15436 // Example (odd elements from first vector):
15437 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
15438 else
15439 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15440 if (ShuffV[i] < 0)
15441 continue;
15442 // If element from non-splat is undef, pick first element from splat.
15443 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
15444 }
15445 } else {
15446 // Example (even elements from first vector):
15447 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15448 if (Mask[0] < NumElts)
15449 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15450 if (ShuffV[i] < 0)
15451 continue;
15452 // If element from non-splat is undef, pick first element from splat.
15453 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
15454 }
15455 // Example (odd elements from first vector):
15456 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
15457 else
15458 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15459 if (ShuffV[i] < 0)
15460 continue;
15461 // If element from non-splat is undef, pick first element from splat.
15462 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
15463 }
15464 }
15465
15466 // If the RHS has undefs, we need to remove them since we may have created
15467 // a shuffle that adds those instead of the splat value.
15468 SDValue SplatVal =
15469 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
15470 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
15471
15472 if (IsLittleEndian)
15473 RHS = TheSplat;
15474 else
15475 LHS = TheSplat;
15476 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15477}
15478
15479SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
15480 LSBaseSDNode *LSBase,
15481 DAGCombinerInfo &DCI) const {
15482 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
15483 "Not a reverse memop pattern!");
15484
15485 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
15486 auto Mask = SVN->getMask();
15487 int i = 0;
15488 auto I = Mask.rbegin();
15489 auto E = Mask.rend();
15490
15491 for (; I != E; ++I) {
15492 if (*I != i)
15493 return false;
15494 i++;
15495 }
15496 return true;
15497 };
15498
15499 SelectionDAG &DAG = DCI.DAG;
15500 EVT VT = SVN->getValueType(0);
15501
15502 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
15503 return SDValue();
15504
15505 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15506 // See comment in PPCVSXSwapRemoval.cpp.
15507 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15508 if (!Subtarget.hasP9Vector())
15509 return SDValue();
15510
15511 if(!IsElementReverse(SVN))
15512 return SDValue();
15513
15514 if (LSBase->getOpcode() == ISD::LOAD) {
15515 // If the load return value 0 has more than one user except the
15516 // shufflevector instruction, it is not profitable to replace the
15517 // shufflevector with a reverse load.
15518 for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
15519 UI != UE; ++UI)
15520 if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
15521 return SDValue();
15522
15523 SDLoc dl(LSBase);
15524 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
15525 return DAG.getMemIntrinsicNode(
15526 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
15527 LSBase->getMemoryVT(), LSBase->getMemOperand());
15528 }
15529
15530 if (LSBase->getOpcode() == ISD::STORE) {
15531 // If there are other uses of the shuffle, the swap cannot be avoided.
15532 // Forcing the use of an X-Form (since swapped stores only have
15533 // X-Forms) without removing the swap is unprofitable.
15534 if (!SVN->hasOneUse())
15535 return SDValue();
15536
15537 SDLoc dl(LSBase);
15538 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
15539 LSBase->getBasePtr()};
15540 return DAG.getMemIntrinsicNode(
15541 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
15542 LSBase->getMemoryVT(), LSBase->getMemOperand());
15543 }
15544
15545 llvm_unreachable("Expected a load or store node here");
15546}
15547
15548static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
15549 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
15550 if (IntrinsicID == Intrinsic::ppc_stdcx)
15551 StoreWidth = 8;
15552 else if (IntrinsicID == Intrinsic::ppc_stwcx)
15553 StoreWidth = 4;
15554 else if (IntrinsicID == Intrinsic::ppc_sthcx)
15555 StoreWidth = 2;
15556 else if (IntrinsicID == Intrinsic::ppc_stbcx)
15557 StoreWidth = 1;
15558 else
15559 return false;
15560 return true;
15561}
15562
15564 DAGCombinerInfo &DCI) const {
15565 SelectionDAG &DAG = DCI.DAG;
15566 SDLoc dl(N);
15567 switch (N->getOpcode()) {
15568 default: break;
15569 case ISD::ADD:
15570 return combineADD(N, DCI);
15571 case ISD::AND: {
15572 // We don't want (and (zext (shift...)), C) if C fits in the width of the
15573 // original input as that will prevent us from selecting optimal rotates.
15574 // This only matters if the input to the extend is i32 widened to i64.
15575 SDValue Op1 = N->getOperand(0);
15576 SDValue Op2 = N->getOperand(1);
15577 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
15578 Op1.getOpcode() != ISD::ANY_EXTEND) ||
15579 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
15580 Op1.getOperand(0).getValueType() != MVT::i32)
15581 break;
15582 SDValue NarrowOp = Op1.getOperand(0);
15583 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
15584 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
15585 break;
15586
15587 uint64_t Imm = Op2->getAsZExtVal();
15588 // Make sure that the constant is narrow enough to fit in the narrow type.
15589 if (!isUInt<32>(Imm))
15590 break;
15591 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
15592 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
15593 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
15594 }
15595 case ISD::SHL:
15596 return combineSHL(N, DCI);
15597 case ISD::SRA:
15598 return combineSRA(N, DCI);
15599 case ISD::SRL:
15600 return combineSRL(N, DCI);
15601 case ISD::MUL:
15602 return combineMUL(N, DCI);
15603 case ISD::FMA:
15604 case PPCISD::FNMSUB:
15605 return combineFMALike(N, DCI);
15606 case PPCISD::SHL:
15607 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
15608 return N->getOperand(0);
15609 break;
15610 case PPCISD::SRL:
15611 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
15612 return N->getOperand(0);
15613 break;
15614 case PPCISD::SRA:
15615 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
15616 if (C->isZero() || // 0 >>s V -> 0.
15617 C->isAllOnes()) // -1 >>s V -> -1.
15618 return N->getOperand(0);
15619 }
15620 break;
15621 case ISD::SIGN_EXTEND:
15622 case ISD::ZERO_EXTEND:
15623 case ISD::ANY_EXTEND:
15624 return DAGCombineExtBoolTrunc(N, DCI);
15625 case ISD::TRUNCATE:
15626 return combineTRUNCATE(N, DCI);
15627 case ISD::SETCC:
15628 if (SDValue CSCC = combineSetCC(N, DCI))
15629 return CSCC;
15630 [[fallthrough]];
15631 case ISD::SELECT_CC:
15632 return DAGCombineTruncBoolExt(N, DCI);
15633 case ISD::SINT_TO_FP:
15634 case ISD::UINT_TO_FP:
15635 return combineFPToIntToFP(N, DCI);
15637 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
15638 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
15639 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
15640 }
15641 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
15642 case ISD::STORE: {
15643
15644 EVT Op1VT = N->getOperand(1).getValueType();
15645 unsigned Opcode = N->getOperand(1).getOpcode();
15646
15647 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15648 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
15649 SDValue Val = combineStoreFPToInt(N, DCI);
15650 if (Val)
15651 return Val;
15652 }
15653
15654 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
15655 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
15656 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
15657 if (Val)
15658 return Val;
15659 }
15660
15661 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
15662 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
15663 N->getOperand(1).getNode()->hasOneUse() &&
15664 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
15665 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
15666
15667 // STBRX can only handle simple types and it makes no sense to store less
15668 // two bytes in byte-reversed order.
15669 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
15670 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
15671 break;
15672
15673 SDValue BSwapOp = N->getOperand(1).getOperand(0);
15674 // Do an any-extend to 32-bits if this is a half-word input.
15675 if (BSwapOp.getValueType() == MVT::i16)
15676 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
15677
15678 // If the type of BSWAP operand is wider than stored memory width
15679 // it need to be shifted to the right side before STBRX.
15680 if (Op1VT.bitsGT(mVT)) {
15681 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
15682 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
15683 DAG.getConstant(Shift, dl, MVT::i32));
15684 // Need to truncate if this is a bswap of i64 stored as i32/i16.
15685 if (Op1VT == MVT::i64)
15686 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
15687 }
15688
15689 SDValue Ops[] = {
15690 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
15691 };
15692 return
15693 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
15694 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
15695 cast<StoreSDNode>(N)->getMemOperand());
15696 }
15697
15698 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
15699 // So it can increase the chance of CSE constant construction.
15700 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
15701 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
15702 // Need to sign-extended to 64-bits to handle negative values.
15703 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
15704 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
15705 MemVT.getSizeInBits());
15706 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
15707
15708 // DAG.getTruncStore() can't be used here because it doesn't accept
15709 // the general (base + offset) addressing mode.
15710 // So we use UpdateNodeOperands and setTruncatingStore instead.
15711 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
15712 N->getOperand(3));
15713 cast<StoreSDNode>(N)->setTruncatingStore(true);
15714 return SDValue(N, 0);
15715 }
15716
15717 // For little endian, VSX stores require generating xxswapd/lxvd2x.
15718 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15719 if (Op1VT.isSimple()) {
15720 MVT StoreVT = Op1VT.getSimpleVT();
15721 if (Subtarget.needsSwapsForVSXMemOps() &&
15722 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
15723 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
15724 return expandVSXStoreForLE(N, DCI);
15725 }
15726 break;
15727 }
15728 case ISD::LOAD: {
15729 LoadSDNode *LD = cast<LoadSDNode>(N);
15730 EVT VT = LD->getValueType(0);
15731
15732 // For little endian, VSX loads require generating lxvd2x/xxswapd.
15733 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15734 if (VT.isSimple()) {
15735 MVT LoadVT = VT.getSimpleVT();
15736 if (Subtarget.needsSwapsForVSXMemOps() &&
15737 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
15738 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
15739 return expandVSXLoadForLE(N, DCI);
15740 }
15741
15742 // We sometimes end up with a 64-bit integer load, from which we extract
15743 // two single-precision floating-point numbers. This happens with
15744 // std::complex<float>, and other similar structures, because of the way we
15745 // canonicalize structure copies. However, if we lack direct moves,
15746 // then the final bitcasts from the extracted integer values to the
15747 // floating-point numbers turn into store/load pairs. Even with direct moves,
15748 // just loading the two floating-point numbers is likely better.
15749 auto ReplaceTwoFloatLoad = [&]() {
15750 if (VT != MVT::i64)
15751 return false;
15752
15753 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
15754 LD->isVolatile())
15755 return false;
15756
15757 // We're looking for a sequence like this:
15758 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
15759 // t16: i64 = srl t13, Constant:i32<32>
15760 // t17: i32 = truncate t16
15761 // t18: f32 = bitcast t17
15762 // t19: i32 = truncate t13
15763 // t20: f32 = bitcast t19
15764
15765 if (!LD->hasNUsesOfValue(2, 0))
15766 return false;
15767
15768 auto UI = LD->use_begin();
15769 while (UI.getUse().getResNo() != 0) ++UI;
15770 SDNode *Trunc = *UI++;
15771 while (UI.getUse().getResNo() != 0) ++UI;
15772 SDNode *RightShift = *UI;
15773 if (Trunc->getOpcode() != ISD::TRUNCATE)
15774 std::swap(Trunc, RightShift);
15775
15776 if (Trunc->getOpcode() != ISD::TRUNCATE ||
15777 Trunc->getValueType(0) != MVT::i32 ||
15778 !Trunc->hasOneUse())
15779 return false;
15780 if (RightShift->getOpcode() != ISD::SRL ||
15781 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
15782 RightShift->getConstantOperandVal(1) != 32 ||
15783 !RightShift->hasOneUse())
15784 return false;
15785
15786 SDNode *Trunc2 = *RightShift->use_begin();
15787 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
15788 Trunc2->getValueType(0) != MVT::i32 ||
15789 !Trunc2->hasOneUse())
15790 return false;
15791
15792 SDNode *Bitcast = *Trunc->use_begin();
15793 SDNode *Bitcast2 = *Trunc2->use_begin();
15794
15795 if (Bitcast->getOpcode() != ISD::BITCAST ||
15796 Bitcast->getValueType(0) != MVT::f32)
15797 return false;
15798 if (Bitcast2->getOpcode() != ISD::BITCAST ||
15799 Bitcast2->getValueType(0) != MVT::f32)
15800 return false;
15801
15802 if (Subtarget.isLittleEndian())
15803 std::swap(Bitcast, Bitcast2);
15804
15805 // Bitcast has the second float (in memory-layout order) and Bitcast2
15806 // has the first one.
15807
15808 SDValue BasePtr = LD->getBasePtr();
15809 if (LD->isIndexed()) {
15810 assert(LD->getAddressingMode() == ISD::PRE_INC &&
15811 "Non-pre-inc AM on PPC?");
15812 BasePtr =
15813 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
15814 LD->getOffset());
15815 }
15816
15817 auto MMOFlags =
15818 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
15819 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
15820 LD->getPointerInfo(), LD->getAlign(),
15821 MMOFlags, LD->getAAInfo());
15822 SDValue AddPtr =
15823 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
15824 BasePtr, DAG.getIntPtrConstant(4, dl));
15825 SDValue FloatLoad2 = DAG.getLoad(
15826 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
15827 LD->getPointerInfo().getWithOffset(4),
15828 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
15829
15830 if (LD->isIndexed()) {
15831 // Note that DAGCombine should re-form any pre-increment load(s) from
15832 // what is produced here if that makes sense.
15833 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
15834 }
15835
15836 DCI.CombineTo(Bitcast2, FloatLoad);
15837 DCI.CombineTo(Bitcast, FloatLoad2);
15838
15839 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
15840 SDValue(FloatLoad2.getNode(), 1));
15841 return true;
15842 };
15843
15844 if (ReplaceTwoFloatLoad())
15845 return SDValue(N, 0);
15846
15847 EVT MemVT = LD->getMemoryVT();
15848 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
15849 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
15850 if (LD->isUnindexed() && VT.isVector() &&
15851 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
15852 // P8 and later hardware should just use LOAD.
15853 !Subtarget.hasP8Vector() &&
15854 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
15855 VT == MVT::v4f32))) &&
15856 LD->getAlign() < ABIAlignment) {
15857 // This is a type-legal unaligned Altivec load.
15858 SDValue Chain = LD->getChain();
15859 SDValue Ptr = LD->getBasePtr();
15860 bool isLittleEndian = Subtarget.isLittleEndian();
15861
15862 // This implements the loading of unaligned vectors as described in
15863 // the venerable Apple Velocity Engine overview. Specifically:
15864 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
15865 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
15866 //
15867 // The general idea is to expand a sequence of one or more unaligned
15868 // loads into an alignment-based permutation-control instruction (lvsl
15869 // or lvsr), a series of regular vector loads (which always truncate
15870 // their input address to an aligned address), and a series of
15871 // permutations. The results of these permutations are the requested
15872 // loaded values. The trick is that the last "extra" load is not taken
15873 // from the address you might suspect (sizeof(vector) bytes after the
15874 // last requested load), but rather sizeof(vector) - 1 bytes after the
15875 // last requested vector. The point of this is to avoid a page fault if
15876 // the base address happened to be aligned. This works because if the
15877 // base address is aligned, then adding less than a full vector length
15878 // will cause the last vector in the sequence to be (re)loaded.
15879 // Otherwise, the next vector will be fetched as you might suspect was
15880 // necessary.
15881
15882 // We might be able to reuse the permutation generation from
15883 // a different base address offset from this one by an aligned amount.
15884 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
15885 // optimization later.
15886 Intrinsic::ID Intr, IntrLD, IntrPerm;
15887 MVT PermCntlTy, PermTy, LDTy;
15888 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15889 : Intrinsic::ppc_altivec_lvsl;
15890 IntrLD = Intrinsic::ppc_altivec_lvx;
15891 IntrPerm = Intrinsic::ppc_altivec_vperm;
15892 PermCntlTy = MVT::v16i8;
15893 PermTy = MVT::v4i32;
15894 LDTy = MVT::v4i32;
15895
15896 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
15897
15898 // Create the new MMO for the new base load. It is like the original MMO,
15899 // but represents an area in memory almost twice the vector size centered
15900 // on the original address. If the address is unaligned, we might start
15901 // reading up to (sizeof(vector)-1) bytes below the address of the
15902 // original unaligned load.
15904 MachineMemOperand *BaseMMO =
15905 MF.getMachineMemOperand(LD->getMemOperand(),
15906 -(int64_t)MemVT.getStoreSize()+1,
15907 2*MemVT.getStoreSize()-1);
15908
15909 // Create the new base load.
15910 SDValue LDXIntID =
15911 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
15912 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
15913 SDValue BaseLoad =
15915 DAG.getVTList(PermTy, MVT::Other),
15916 BaseLoadOps, LDTy, BaseMMO);
15917
15918 // Note that the value of IncOffset (which is provided to the next
15919 // load's pointer info offset value, and thus used to calculate the
15920 // alignment), and the value of IncValue (which is actually used to
15921 // increment the pointer value) are different! This is because we
15922 // require the next load to appear to be aligned, even though it
15923 // is actually offset from the base pointer by a lesser amount.
15924 int IncOffset = VT.getSizeInBits() / 8;
15925 int IncValue = IncOffset;
15926
15927 // Walk (both up and down) the chain looking for another load at the real
15928 // (aligned) offset (the alignment of the other load does not matter in
15929 // this case). If found, then do not use the offset reduction trick, as
15930 // that will prevent the loads from being later combined (as they would
15931 // otherwise be duplicates).
15932 if (!findConsecutiveLoad(LD, DAG))
15933 --IncValue;
15934
15935 SDValue Increment =
15936 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
15937 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
15938
15939 MachineMemOperand *ExtraMMO =
15940 MF.getMachineMemOperand(LD->getMemOperand(),
15941 1, 2*MemVT.getStoreSize()-1);
15942 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
15943 SDValue ExtraLoad =
15945 DAG.getVTList(PermTy, MVT::Other),
15946 ExtraLoadOps, LDTy, ExtraMMO);
15947
15948 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
15949 BaseLoad.getValue(1), ExtraLoad.getValue(1));
15950
15951 // Because vperm has a big-endian bias, we must reverse the order
15952 // of the input vectors and complement the permute control vector
15953 // when generating little endian code. We have already handled the
15954 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
15955 // and ExtraLoad here.
15956 SDValue Perm;
15957 if (isLittleEndian)
15958 Perm = BuildIntrinsicOp(IntrPerm,
15959 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
15960 else
15961 Perm = BuildIntrinsicOp(IntrPerm,
15962 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
15963
15964 if (VT != PermTy)
15965 Perm = Subtarget.hasAltivec()
15966 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
15967 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
15968 DAG.getTargetConstant(1, dl, MVT::i64));
15969 // second argument is 1 because this rounding
15970 // is always exact.
15971
15972 // The output of the permutation is our loaded result, the TokenFactor is
15973 // our new chain.
15974 DCI.CombineTo(N, Perm, TF);
15975 return SDValue(N, 0);
15976 }
15977 }
15978 break;
15980 bool isLittleEndian = Subtarget.isLittleEndian();
15981 unsigned IID = N->getConstantOperandVal(0);
15982 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15983 : Intrinsic::ppc_altivec_lvsl);
15984 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
15985 SDValue Add = N->getOperand(1);
15986
15987 int Bits = 4 /* 16 byte alignment */;
15988
15989 if (DAG.MaskedValueIsZero(Add->getOperand(1),
15990 APInt::getAllOnes(Bits /* alignment */)
15991 .zext(Add.getScalarValueSizeInBits()))) {
15992 SDNode *BasePtr = Add->getOperand(0).getNode();
15993 for (SDNode *U : BasePtr->uses()) {
15994 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15995 U->getConstantOperandVal(0) == IID) {
15996 // We've found another LVSL/LVSR, and this address is an aligned
15997 // multiple of that one. The results will be the same, so use the
15998 // one we've just found instead.
15999
16000 return SDValue(U, 0);
16001 }
16002 }
16003 }
16004
16005 if (isa<ConstantSDNode>(Add->getOperand(1))) {
16006 SDNode *BasePtr = Add->getOperand(0).getNode();
16007 for (SDNode *U : BasePtr->uses()) {
16008 if (U->getOpcode() == ISD::ADD &&
16009 isa<ConstantSDNode>(U->getOperand(1)) &&
16010 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
16011 (1ULL << Bits) ==
16012 0) {
16013 SDNode *OtherAdd = U;
16014 for (SDNode *V : OtherAdd->uses()) {
16015 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16016 V->getConstantOperandVal(0) == IID) {
16017 return SDValue(V, 0);
16018 }
16019 }
16020 }
16021 }
16022 }
16023 }
16024
16025 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
16026 // Expose the vabsduw/h/b opportunity for down stream
16027 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
16028 (IID == Intrinsic::ppc_altivec_vmaxsw ||
16029 IID == Intrinsic::ppc_altivec_vmaxsh ||
16030 IID == Intrinsic::ppc_altivec_vmaxsb)) {
16031 SDValue V1 = N->getOperand(1);
16032 SDValue V2 = N->getOperand(2);
16033 if ((V1.getSimpleValueType() == MVT::v4i32 ||
16034 V1.getSimpleValueType() == MVT::v8i16 ||
16035 V1.getSimpleValueType() == MVT::v16i8) &&
16036 V1.getSimpleValueType() == V2.getSimpleValueType()) {
16037 // (0-a, a)
16038 if (V1.getOpcode() == ISD::SUB &&
16040 V1.getOperand(1) == V2) {
16041 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
16042 }
16043 // (a, 0-a)
16044 if (V2.getOpcode() == ISD::SUB &&
16045 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
16046 V2.getOperand(1) == V1) {
16047 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16048 }
16049 // (x-y, y-x)
16050 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
16051 V1.getOperand(0) == V2.getOperand(1) &&
16052 V1.getOperand(1) == V2.getOperand(0)) {
16053 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16054 }
16055 }
16056 }
16057 }
16058
16059 break;
16061 switch (N->getConstantOperandVal(1)) {
16062 default:
16063 break;
16064 case Intrinsic::ppc_altivec_vsum4sbs:
16065 case Intrinsic::ppc_altivec_vsum4shs:
16066 case Intrinsic::ppc_altivec_vsum4ubs: {
16067 // These sum-across intrinsics only have a chain due to the side effect
16068 // that they may set the SAT bit. If we know the SAT bit will not be set
16069 // for some inputs, we can replace any uses of their chain with the
16070 // input chain.
16071 if (BuildVectorSDNode *BVN =
16072 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
16073 APInt APSplatBits, APSplatUndef;
16074 unsigned SplatBitSize;
16075 bool HasAnyUndefs;
16076 bool BVNIsConstantSplat = BVN->isConstantSplat(
16077 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
16078 !Subtarget.isLittleEndian());
16079 // If the constant splat vector is 0, the SAT bit will not be set.
16080 if (BVNIsConstantSplat && APSplatBits == 0)
16081 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
16082 }
16083 return SDValue();
16084 }
16085 case Intrinsic::ppc_vsx_lxvw4x:
16086 case Intrinsic::ppc_vsx_lxvd2x:
16087 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16088 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16089 if (Subtarget.needsSwapsForVSXMemOps())
16090 return expandVSXLoadForLE(N, DCI);
16091 break;
16092 }
16093 break;
16095 // For little endian, VSX stores require generating xxswapd/stxvd2x.
16096 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16097 if (Subtarget.needsSwapsForVSXMemOps()) {
16098 switch (N->getConstantOperandVal(1)) {
16099 default:
16100 break;
16101 case Intrinsic::ppc_vsx_stxvw4x:
16102 case Intrinsic::ppc_vsx_stxvd2x:
16103 return expandVSXStoreForLE(N, DCI);
16104 }
16105 }
16106 break;
16107 case ISD::BSWAP: {
16108 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16109 // For subtargets without LDBRX, we can still do better than the default
16110 // expansion even for 64-bit BSWAP (LOAD).
16111 bool Is64BitBswapOn64BitTgt =
16112 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
16113 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
16114 N->getOperand(0).hasOneUse();
16115 if (IsSingleUseNormalLd &&
16116 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
16117 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
16118 SDValue Load = N->getOperand(0);
16119 LoadSDNode *LD = cast<LoadSDNode>(Load);
16120 // Create the byte-swapping load.
16121 SDValue Ops[] = {
16122 LD->getChain(), // Chain
16123 LD->getBasePtr(), // Ptr
16124 DAG.getValueType(N->getValueType(0)) // VT
16125 };
16126 SDValue BSLoad =
16128 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
16129 MVT::i64 : MVT::i32, MVT::Other),
16130 Ops, LD->getMemoryVT(), LD->getMemOperand());
16131
16132 // If this is an i16 load, insert the truncate.
16133 SDValue ResVal = BSLoad;
16134 if (N->getValueType(0) == MVT::i16)
16135 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
16136
16137 // First, combine the bswap away. This makes the value produced by the
16138 // load dead.
16139 DCI.CombineTo(N, ResVal);
16140
16141 // Next, combine the load away, we give it a bogus result value but a real
16142 // chain result. The result value is dead because the bswap is dead.
16143 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
16144
16145 // Return N so it doesn't get rechecked!
16146 return SDValue(N, 0);
16147 }
16148 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16149 // before legalization so that the BUILD_PAIR is handled correctly.
16150 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
16151 !IsSingleUseNormalLd)
16152 return SDValue();
16153 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
16154
16155 // Can't split volatile or atomic loads.
16156 if (!LD->isSimple())
16157 return SDValue();
16158 SDValue BasePtr = LD->getBasePtr();
16159 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
16160 LD->getPointerInfo(), LD->getAlign());
16161 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
16162 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16163 DAG.getIntPtrConstant(4, dl));
16165 LD->getMemOperand(), 4, 4);
16166 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
16167 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
16168 SDValue Res;
16169 if (Subtarget.isLittleEndian())
16170 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
16171 else
16172 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
16173 SDValue TF =
16174 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16175 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
16176 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
16177 return Res;
16178 }
16179 case PPCISD::VCMP:
16180 // If a VCMP_rec node already exists with exactly the same operands as this
16181 // node, use its result instead of this node (VCMP_rec computes both a CR6
16182 // and a normal output).
16183 //
16184 if (!N->getOperand(0).hasOneUse() &&
16185 !N->getOperand(1).hasOneUse() &&
16186 !N->getOperand(2).hasOneUse()) {
16187
16188 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
16189 SDNode *VCMPrecNode = nullptr;
16190
16191 SDNode *LHSN = N->getOperand(0).getNode();
16192 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
16193 UI != E; ++UI)
16194 if (UI->getOpcode() == PPCISD::VCMP_rec &&
16195 UI->getOperand(1) == N->getOperand(1) &&
16196 UI->getOperand(2) == N->getOperand(2) &&
16197 UI->getOperand(0) == N->getOperand(0)) {
16198 VCMPrecNode = *UI;
16199 break;
16200 }
16201
16202 // If there is no VCMP_rec node, or if the flag value has a single use,
16203 // don't transform this.
16204 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
16205 break;
16206
16207 // Look at the (necessarily single) use of the flag value. If it has a
16208 // chain, this transformation is more complex. Note that multiple things
16209 // could use the value result, which we should ignore.
16210 SDNode *FlagUser = nullptr;
16211 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
16212 FlagUser == nullptr; ++UI) {
16213 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
16214 SDNode *User = *UI;
16215 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
16216 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
16217 FlagUser = User;
16218 break;
16219 }
16220 }
16221 }
16222
16223 // If the user is a MFOCRF instruction, we know this is safe.
16224 // Otherwise we give up for right now.
16225 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
16226 return SDValue(VCMPrecNode, 0);
16227 }
16228 break;
16229 case ISD::BR_CC: {
16230 // If this is a branch on an altivec predicate comparison, lower this so
16231 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
16232 // lowering is done pre-legalize, because the legalizer lowers the predicate
16233 // compare down to code that is difficult to reassemble.
16234 // This code also handles branches that depend on the result of a store
16235 // conditional.
16236 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
16237 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
16238
16239 int CompareOpc;
16240 bool isDot;
16241
16242 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
16243 break;
16244
16245 // Since we are doing this pre-legalize, the RHS can be a constant of
16246 // arbitrary bitwidth which may cause issues when trying to get the value
16247 // from the underlying APInt.
16248 auto RHSAPInt = RHS->getAsAPIntVal();
16249 if (!RHSAPInt.isIntN(64))
16250 break;
16251
16252 unsigned Val = RHSAPInt.getZExtValue();
16253 auto isImpossibleCompare = [&]() {
16254 // If this is a comparison against something other than 0/1, then we know
16255 // that the condition is never/always true.
16256 if (Val != 0 && Val != 1) {
16257 if (CC == ISD::SETEQ) // Cond never true, remove branch.
16258 return N->getOperand(0);
16259 // Always !=, turn it into an unconditional branch.
16260 return DAG.getNode(ISD::BR, dl, MVT::Other,
16261 N->getOperand(0), N->getOperand(4));
16262 }
16263 return SDValue();
16264 };
16265 // Combine branches fed by store conditional instructions (st[bhwd]cx).
16266 unsigned StoreWidth = 0;
16267 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
16268 isStoreConditional(LHS, StoreWidth)) {
16269 if (SDValue Impossible = isImpossibleCompare())
16270 return Impossible;
16271 PPC::Predicate CompOpc;
16272 // eq 0 => ne
16273 // ne 0 => eq
16274 // eq 1 => eq
16275 // ne 1 => ne
16276 if (Val == 0)
16277 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
16278 else
16279 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
16280
16281 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
16282 DAG.getConstant(StoreWidth, dl, MVT::i32)};
16283 auto *MemNode = cast<MemSDNode>(LHS);
16284 SDValue ConstSt = DAG.getMemIntrinsicNode(
16286 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
16287 MemNode->getMemoryVT(), MemNode->getMemOperand());
16288
16289 SDValue InChain;
16290 // Unchain the branch from the original store conditional.
16291 if (N->getOperand(0) == LHS.getValue(1))
16292 InChain = LHS.getOperand(0);
16293 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
16294 SmallVector<SDValue, 4> InChains;
16295 SDValue InTF = N->getOperand(0);
16296 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
16297 if (InTF.getOperand(i) != LHS.getValue(1))
16298 InChains.push_back(InTF.getOperand(i));
16299 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
16300 }
16301
16302 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
16303 DAG.getConstant(CompOpc, dl, MVT::i32),
16304 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
16305 ConstSt.getValue(2));
16306 }
16307
16308 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16309 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
16310 assert(isDot && "Can't compare against a vector result!");
16311
16312 if (SDValue Impossible = isImpossibleCompare())
16313 return Impossible;
16314
16315 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
16316 // Create the PPCISD altivec 'dot' comparison node.
16317 SDValue Ops[] = {
16318 LHS.getOperand(2), // LHS of compare
16319 LHS.getOperand(3), // RHS of compare
16320 DAG.getConstant(CompareOpc, dl, MVT::i32)
16321 };
16322 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
16323 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
16324
16325 // Unpack the result based on how the target uses it.
16326 PPC::Predicate CompOpc;
16327 switch (LHS.getConstantOperandVal(1)) {
16328 default: // Can't happen, don't crash on invalid number though.
16329 case 0: // Branch on the value of the EQ bit of CR6.
16330 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
16331 break;
16332 case 1: // Branch on the inverted value of the EQ bit of CR6.
16333 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
16334 break;
16335 case 2: // Branch on the value of the LT bit of CR6.
16336 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
16337 break;
16338 case 3: // Branch on the inverted value of the LT bit of CR6.
16339 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
16340 break;
16341 }
16342
16343 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
16344 DAG.getConstant(CompOpc, dl, MVT::i32),
16345 DAG.getRegister(PPC::CR6, MVT::i32),
16346 N->getOperand(4), CompNode.getValue(1));
16347 }
16348 break;
16349 }
16350 case ISD::BUILD_VECTOR:
16351 return DAGCombineBuildVector(N, DCI);
16352 }
16353
16354 return SDValue();
16355}
16356
16357SDValue
16359 SelectionDAG &DAG,
16360 SmallVectorImpl<SDNode *> &Created) const {
16361 // fold (sdiv X, pow2)
16362 EVT VT = N->getValueType(0);
16363 if (VT == MVT::i64 && !Subtarget.isPPC64())
16364 return SDValue();
16365 if ((VT != MVT::i32 && VT != MVT::i64) ||
16366 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
16367 return SDValue();
16368
16369 SDLoc DL(N);
16370 SDValue N0 = N->getOperand(0);
16371
16372 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16373 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
16374 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
16375
16376 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
16377 Created.push_back(Op.getNode());
16378
16379 if (IsNegPow2) {
16380 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
16381 Created.push_back(Op.getNode());
16382 }
16383
16384 return Op;
16385}
16386
16387//===----------------------------------------------------------------------===//
16388// Inline Assembly Support
16389//===----------------------------------------------------------------------===//
16390
16392 KnownBits &Known,
16393 const APInt &DemandedElts,
16394 const SelectionDAG &DAG,
16395 unsigned Depth) const {
16396 Known.resetAll();
16397 switch (Op.getOpcode()) {
16398 default: break;
16399 case PPCISD::LBRX: {
16400 // lhbrx is known to have the top bits cleared out.
16401 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
16402 Known.Zero = 0xFFFF0000;
16403 break;
16404 }
16406 switch (Op.getConstantOperandVal(0)) {
16407 default: break;
16408 case Intrinsic::ppc_altivec_vcmpbfp_p:
16409 case Intrinsic::ppc_altivec_vcmpeqfp_p:
16410 case Intrinsic::ppc_altivec_vcmpequb_p:
16411 case Intrinsic::ppc_altivec_vcmpequh_p:
16412 case Intrinsic::ppc_altivec_vcmpequw_p:
16413 case Intrinsic::ppc_altivec_vcmpequd_p:
16414 case Intrinsic::ppc_altivec_vcmpequq_p:
16415 case Intrinsic::ppc_altivec_vcmpgefp_p:
16416 case Intrinsic::ppc_altivec_vcmpgtfp_p:
16417 case Intrinsic::ppc_altivec_vcmpgtsb_p:
16418 case Intrinsic::ppc_altivec_vcmpgtsh_p:
16419 case Intrinsic::ppc_altivec_vcmpgtsw_p:
16420 case Intrinsic::ppc_altivec_vcmpgtsd_p:
16421 case Intrinsic::ppc_altivec_vcmpgtsq_p:
16422 case Intrinsic::ppc_altivec_vcmpgtub_p:
16423 case Intrinsic::ppc_altivec_vcmpgtuh_p:
16424 case Intrinsic::ppc_altivec_vcmpgtuw_p:
16425 case Intrinsic::ppc_altivec_vcmpgtud_p:
16426 case Intrinsic::ppc_altivec_vcmpgtuq_p:
16427 Known.Zero = ~1U; // All bits but the low one are known to be zero.
16428 break;
16429 }
16430 break;
16431 }
16433 switch (Op.getConstantOperandVal(1)) {
16434 default:
16435 break;
16436 case Intrinsic::ppc_load2r:
16437 // Top bits are cleared for load2r (which is the same as lhbrx).
16438 Known.Zero = 0xFFFF0000;
16439 break;
16440 }
16441 break;
16442 }
16443 }
16444}
16445
16447 switch (Subtarget.getCPUDirective()) {
16448 default: break;
16449 case PPC::DIR_970:
16450 case PPC::DIR_PWR4:
16451 case PPC::DIR_PWR5:
16452 case PPC::DIR_PWR5X:
16453 case PPC::DIR_PWR6:
16454 case PPC::DIR_PWR6X:
16455 case PPC::DIR_PWR7:
16456 case PPC::DIR_PWR8:
16457 case PPC::DIR_PWR9:
16458 case PPC::DIR_PWR10:
16459 case PPC::DIR_PWR_FUTURE: {
16460 if (!ML)
16461 break;
16462
16464 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
16465 // so that we can decrease cache misses and branch-prediction misses.
16466 // Actual alignment of the loop will depend on the hotness check and other
16467 // logic in alignBlocks.
16468 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
16469 return Align(32);
16470 }
16471
16472 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
16473
16474 // For small loops (between 5 and 8 instructions), align to a 32-byte
16475 // boundary so that the entire loop fits in one instruction-cache line.
16476 uint64_t LoopSize = 0;
16477 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
16478 for (const MachineInstr &J : **I) {
16479 LoopSize += TII->getInstSizeInBytes(J);
16480 if (LoopSize > 32)
16481 break;
16482 }
16483
16484 if (LoopSize > 16 && LoopSize <= 32)
16485 return Align(32);
16486
16487 break;
16488 }
16489 }
16490
16492}
16493
16494/// getConstraintType - Given a constraint, return the type of
16495/// constraint it is for this target.
16498 if (Constraint.size() == 1) {
16499 switch (Constraint[0]) {
16500 default: break;
16501 case 'b':
16502 case 'r':
16503 case 'f':
16504 case 'd':
16505 case 'v':
16506 case 'y':
16507 return C_RegisterClass;
16508 case 'Z':
16509 // FIXME: While Z does indicate a memory constraint, it specifically
16510 // indicates an r+r address (used in conjunction with the 'y' modifier
16511 // in the replacement string). Currently, we're forcing the base
16512 // register to be r0 in the asm printer (which is interpreted as zero)
16513 // and forming the complete address in the second register. This is
16514 // suboptimal.
16515 return C_Memory;
16516 }
16517 } else if (Constraint == "wc") { // individual CR bits.
16518 return C_RegisterClass;
16519 } else if (Constraint == "wa" || Constraint == "wd" ||
16520 Constraint == "wf" || Constraint == "ws" ||
16521 Constraint == "wi" || Constraint == "ww") {
16522 return C_RegisterClass; // VSX registers.
16523 }
16524 return TargetLowering::getConstraintType(Constraint);
16525}
16526
16527/// Examine constraint type and operand type and determine a weight value.
16528/// This object must already have been set up with the operand type
16529/// and the current alternative constraint selected.
16532 AsmOperandInfo &info, const char *constraint) const {
16534 Value *CallOperandVal = info.CallOperandVal;
16535 // If we don't have a value, we can't do a match,
16536 // but allow it at the lowest weight.
16537 if (!CallOperandVal)
16538 return CW_Default;
16539 Type *type = CallOperandVal->getType();
16540
16541 // Look at the constraint type.
16542 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
16543 return CW_Register; // an individual CR bit.
16544 else if ((StringRef(constraint) == "wa" ||
16545 StringRef(constraint) == "wd" ||
16546 StringRef(constraint) == "wf") &&
16547 type->isVectorTy())
16548 return CW_Register;
16549 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
16550 return CW_Register; // just hold 64-bit integers data.
16551 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
16552 return CW_Register;
16553 else if (StringRef(constraint) == "ww" && type->isFloatTy())
16554 return CW_Register;
16555
16556 switch (*constraint) {
16557 default:
16559 break;
16560 case 'b':
16561 if (type->isIntegerTy())
16562 weight = CW_Register;
16563 break;
16564 case 'f':
16565 if (type->isFloatTy())
16566 weight = CW_Register;
16567 break;
16568 case 'd':
16569 if (type->isDoubleTy())
16570 weight = CW_Register;
16571 break;
16572 case 'v':
16573 if (type->isVectorTy())
16574 weight = CW_Register;
16575 break;
16576 case 'y':
16577 weight = CW_Register;
16578 break;
16579 case 'Z':
16580 weight = CW_Memory;
16581 break;
16582 }
16583 return weight;
16584}
16585
16586std::pair<unsigned, const TargetRegisterClass *>
16588 StringRef Constraint,
16589 MVT VT) const {
16590 if (Constraint.size() == 1) {
16591 // GCC RS6000 Constraint Letters
16592 switch (Constraint[0]) {
16593 case 'b': // R1-R31
16594 if (VT == MVT::i64 && Subtarget.isPPC64())
16595 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16596 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16597 case 'r': // R0-R31
16598 if (VT == MVT::i64 && Subtarget.isPPC64())
16599 return std::make_pair(0U, &PPC::G8RCRegClass);
16600 return std::make_pair(0U, &PPC::GPRCRegClass);
16601 // 'd' and 'f' constraints are both defined to be "the floating point
16602 // registers", where one is for 32-bit and the other for 64-bit. We don't
16603 // really care overly much here so just give them all the same reg classes.
16604 case 'd':
16605 case 'f':
16606 if (Subtarget.hasSPE()) {
16607 if (VT == MVT::f32 || VT == MVT::i32)
16608 return std::make_pair(0U, &PPC::GPRCRegClass);
16609 if (VT == MVT::f64 || VT == MVT::i64)
16610 return std::make_pair(0U, &PPC::SPERCRegClass);
16611 } else {
16612 if (VT == MVT::f32 || VT == MVT::i32)
16613 return std::make_pair(0U, &PPC::F4RCRegClass);
16614 if (VT == MVT::f64 || VT == MVT::i64)
16615 return std::make_pair(0U, &PPC::F8RCRegClass);
16616 }
16617 break;
16618 case 'v':
16619 if (Subtarget.hasAltivec() && VT.isVector())
16620 return std::make_pair(0U, &PPC::VRRCRegClass);
16621 else if (Subtarget.hasVSX())
16622 // Scalars in Altivec registers only make sense with VSX.
16623 return std::make_pair(0U, &PPC::VFRCRegClass);
16624 break;
16625 case 'y': // crrc
16626 return std::make_pair(0U, &PPC::CRRCRegClass);
16627 }
16628 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
16629 // An individual CR bit.
16630 return std::make_pair(0U, &PPC::CRBITRCRegClass);
16631 } else if ((Constraint == "wa" || Constraint == "wd" ||
16632 Constraint == "wf" || Constraint == "wi") &&
16633 Subtarget.hasVSX()) {
16634 // A VSX register for either a scalar (FP) or vector. There is no
16635 // support for single precision scalars on subtargets prior to Power8.
16636 if (VT.isVector())
16637 return std::make_pair(0U, &PPC::VSRCRegClass);
16638 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16639 return std::make_pair(0U, &PPC::VSSRCRegClass);
16640 return std::make_pair(0U, &PPC::VSFRCRegClass);
16641 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
16642 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16643 return std::make_pair(0U, &PPC::VSSRCRegClass);
16644 else
16645 return std::make_pair(0U, &PPC::VSFRCRegClass);
16646 } else if (Constraint == "lr") {
16647 if (VT == MVT::i64)
16648 return std::make_pair(0U, &PPC::LR8RCRegClass);
16649 else
16650 return std::make_pair(0U, &PPC::LRRCRegClass);
16651 }
16652
16653 // Handle special cases of physical registers that are not properly handled
16654 // by the base class.
16655 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
16656 // If we name a VSX register, we can't defer to the base class because it
16657 // will not recognize the correct register (their names will be VSL{0-31}
16658 // and V{0-31} so they won't match). So we match them here.
16659 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
16660 int VSNum = atoi(Constraint.data() + 3);
16661 assert(VSNum >= 0 && VSNum <= 63 &&
16662 "Attempted to access a vsr out of range");
16663 if (VSNum < 32)
16664 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16665 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16666 }
16667
16668 // For float registers, we can't defer to the base class as it will match
16669 // the SPILLTOVSRRC class.
16670 if (Constraint.size() > 3 && Constraint[1] == 'f') {
16671 int RegNum = atoi(Constraint.data() + 2);
16672 if (RegNum > 31 || RegNum < 0)
16673 report_fatal_error("Invalid floating point register number");
16674 if (VT == MVT::f32 || VT == MVT::i32)
16675 return Subtarget.hasSPE()
16676 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16677 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16678 if (VT == MVT::f64 || VT == MVT::i64)
16679 return Subtarget.hasSPE()
16680 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16681 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16682 }
16683 }
16684
16685 std::pair<unsigned, const TargetRegisterClass *> R =
16687
16688 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16689 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
16690 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16691 // register.
16692 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16693 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
16694 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
16695 PPC::GPRCRegClass.contains(R.first))
16696 return std::make_pair(TRI->getMatchingSuperReg(R.first,
16697 PPC::sub_32, &PPC::G8RCRegClass),
16698 &PPC::G8RCRegClass);
16699
16700 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16701 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
16702 R.first = PPC::CR0;
16703 R.second = &PPC::CRRCRegClass;
16704 }
16705 // FIXME: This warning should ideally be emitted in the front end.
16706 const auto &TM = getTargetMachine();
16707 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
16708 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16709 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16710 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16711 errs() << "warning: vector registers 20 to 32 are reserved in the "
16712 "default AIX AltiVec ABI and cannot be used\n";
16713 }
16714
16715 return R;
16716}
16717
16718/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16719/// vector. If it is invalid, don't add anything to Ops.
16721 StringRef Constraint,
16722 std::vector<SDValue> &Ops,
16723 SelectionDAG &DAG) const {
16724 SDValue Result;
16725
16726 // Only support length 1 constraints.
16727 if (Constraint.size() > 1)
16728 return;
16729
16730 char Letter = Constraint[0];
16731 switch (Letter) {
16732 default: break;
16733 case 'I':
16734 case 'J':
16735 case 'K':
16736 case 'L':
16737 case 'M':
16738 case 'N':
16739 case 'O':
16740 case 'P': {
16741 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
16742 if (!CST) return; // Must be an immediate to match.
16743 SDLoc dl(Op);
16744 int64_t Value = CST->getSExtValue();
16745 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
16746 // numbers are printed as such.
16747 switch (Letter) {
16748 default: llvm_unreachable("Unknown constraint letter!");
16749 case 'I': // "I" is a signed 16-bit constant.
16750 if (isInt<16>(Value))
16751 Result = DAG.getTargetConstant(Value, dl, TCVT);
16752 break;
16753 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
16754 if (isShiftedUInt<16, 16>(Value))
16755 Result = DAG.getTargetConstant(Value, dl, TCVT);
16756 break;
16757 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
16758 if (isShiftedInt<16, 16>(Value))
16759 Result = DAG.getTargetConstant(Value, dl, TCVT);
16760 break;
16761 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
16762 if (isUInt<16>(Value))
16763 Result = DAG.getTargetConstant(Value, dl, TCVT);
16764 break;
16765 case 'M': // "M" is a constant that is greater than 31.
16766 if (Value > 31)
16767 Result = DAG.getTargetConstant(Value, dl, TCVT);
16768 break;
16769 case 'N': // "N" is a positive constant that is an exact power of two.
16770 if (Value > 0 && isPowerOf2_64(Value))
16771 Result = DAG.getTargetConstant(Value, dl, TCVT);
16772 break;
16773 case 'O': // "O" is the constant zero.
16774 if (Value == 0)
16775 Result = DAG.getTargetConstant(Value, dl, TCVT);
16776 break;
16777 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
16778 if (isInt<16>(-Value))
16779 Result = DAG.getTargetConstant(Value, dl, TCVT);
16780 break;
16781 }
16782 break;
16783 }
16784 }
16785
16786 if (Result.getNode()) {
16787 Ops.push_back(Result);
16788 return;
16789 }
16790
16791 // Handle standard constraint letters.
16792 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
16793}
16794
16797 SelectionDAG &DAG) const {
16798 if (I.getNumOperands() <= 1)
16799 return;
16800 if (!isa<ConstantSDNode>(Ops[1].getNode()))
16801 return;
16802 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
16803 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
16804 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
16805 return;
16806
16807 if (I.hasMetadata("annotation")) {
16808 MDNode *MDN = I.getMetadata("annotation");
16809 Ops.push_back(DAG.getMDNode(MDN));
16810 }
16811}
16812
16813// isLegalAddressingMode - Return true if the addressing mode represented
16814// by AM is legal for this target, for a load/store of the specified type.
16816 const AddrMode &AM, Type *Ty,
16817 unsigned AS,
16818 Instruction *I) const {
16819 // Vector type r+i form is supported since power9 as DQ form. We don't check
16820 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
16821 // imm form is preferred and the offset can be adjusted to use imm form later
16822 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
16823 // max offset to check legal addressing mode, we should be a little aggressive
16824 // to contain other offsets for that LSRUse.
16825 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
16826 return false;
16827
16828 // PPC allows a sign-extended 16-bit immediate field.
16829 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
16830 return false;
16831
16832 // No global is ever allowed as a base.
16833 if (AM.BaseGV)
16834 return false;
16835
16836 // PPC only support r+r,
16837 switch (AM.Scale) {
16838 case 0: // "r+i" or just "i", depending on HasBaseReg.
16839 break;
16840 case 1:
16841 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
16842 return false;
16843 // Otherwise we have r+r or r+i.
16844 break;
16845 case 2:
16846 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
16847 return false;
16848 // Allow 2*r as r+r.
16849 break;
16850 default:
16851 // No other scales are supported.
16852 return false;
16853 }
16854
16855 return true;
16856}
16857
16858SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
16859 SelectionDAG &DAG) const {
16861 MachineFrameInfo &MFI = MF.getFrameInfo();
16862 MFI.setReturnAddressIsTaken(true);
16863
16865 return SDValue();
16866
16867 SDLoc dl(Op);
16868 unsigned Depth = Op.getConstantOperandVal(0);
16869
16870 // Make sure the function does not optimize away the store of the RA to
16871 // the stack.
16872 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
16873 FuncInfo->setLRStoreRequired();
16874 bool isPPC64 = Subtarget.isPPC64();
16875 auto PtrVT = getPointerTy(MF.getDataLayout());
16876
16877 if (Depth > 0) {
16878 // The link register (return address) is saved in the caller's frame
16879 // not the callee's stack frame. So we must get the caller's frame
16880 // address and load the return address at the LR offset from there.
16881 SDValue FrameAddr =
16882 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
16883 LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
16884 SDValue Offset =
16885 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
16886 isPPC64 ? MVT::i64 : MVT::i32);
16887 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
16888 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
16890 }
16891
16892 // Just load the return address off the stack.
16893 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
16894 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
16896}
16897
16898SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
16899 SelectionDAG &DAG) const {
16900 SDLoc dl(Op);
16901 unsigned Depth = Op.getConstantOperandVal(0);
16902
16904 MachineFrameInfo &MFI = MF.getFrameInfo();
16905 MFI.setFrameAddressIsTaken(true);
16906
16907 EVT PtrVT = getPointerTy(MF.getDataLayout());
16908 bool isPPC64 = PtrVT == MVT::i64;
16909
16910 // Naked functions never have a frame pointer, and so we use r1. For all
16911 // other functions, this decision must be delayed until during PEI.
16912 unsigned FrameReg;
16913 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
16914 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
16915 else
16916 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
16917
16918 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
16919 PtrVT);
16920 while (Depth--)
16921 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
16922 FrameAddr, MachinePointerInfo());
16923 return FrameAddr;
16924}
16925
16926// FIXME? Maybe this could be a TableGen attribute on some registers and
16927// this table could be generated automatically from RegInfo.
16929 const MachineFunction &MF) const {
16930 bool isPPC64 = Subtarget.isPPC64();
16931
16932 bool is64Bit = isPPC64 && VT == LLT::scalar(64);
16933 if (!is64Bit && VT != LLT::scalar(32))
16934 report_fatal_error("Invalid register global variable type");
16935
16937 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
16938 .Case("r2", isPPC64 ? Register() : PPC::R2)
16939 .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
16940 .Default(Register());
16941
16942 if (Reg)
16943 return Reg;
16944 report_fatal_error("Invalid register name global variable");
16945}
16946
16948 // 32-bit SVR4 ABI access everything as got-indirect.
16949 if (Subtarget.is32BitELFABI())
16950 return true;
16951
16952 // AIX accesses everything indirectly through the TOC, which is similar to
16953 // the GOT.
16954 if (Subtarget.isAIXABI())
16955 return true;
16956
16958 // If it is small or large code model, module locals are accessed
16959 // indirectly by loading their address from .toc/.got.
16960 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
16961 return true;
16962
16963 // JumpTable and BlockAddress are accessed as got-indirect.
16964 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
16965 return true;
16966
16967 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
16968 return Subtarget.isGVIndirectSymbol(G->getGlobal());
16969
16970 return false;
16971}
16972
16973bool
16975 // The PowerPC target isn't yet aware of offsets.
16976 return false;
16977}
16978
16980 const CallInst &I,
16981 MachineFunction &MF,
16982 unsigned Intrinsic) const {
16983 switch (Intrinsic) {
16984 case Intrinsic::ppc_atomicrmw_xchg_i128:
16985 case Intrinsic::ppc_atomicrmw_add_i128:
16986 case Intrinsic::ppc_atomicrmw_sub_i128:
16987 case Intrinsic::ppc_atomicrmw_nand_i128:
16988 case Intrinsic::ppc_atomicrmw_and_i128:
16989 case Intrinsic::ppc_atomicrmw_or_i128:
16990 case Intrinsic::ppc_atomicrmw_xor_i128:
16991 case Intrinsic::ppc_cmpxchg_i128:
16993 Info.memVT = MVT::i128;
16994 Info.ptrVal = I.getArgOperand(0);
16995 Info.offset = 0;
16996 Info.align = Align(16);
16999 return true;
17000 case Intrinsic::ppc_atomic_load_i128:
17002 Info.memVT = MVT::i128;
17003 Info.ptrVal = I.getArgOperand(0);
17004 Info.offset = 0;
17005 Info.align = Align(16);
17007 return true;
17008 case Intrinsic::ppc_atomic_store_i128:
17010 Info.memVT = MVT::i128;
17011 Info.ptrVal = I.getArgOperand(2);
17012 Info.offset = 0;
17013 Info.align = Align(16);
17015 return true;
17016 case Intrinsic::ppc_altivec_lvx:
17017 case Intrinsic::ppc_altivec_lvxl:
17018 case Intrinsic::ppc_altivec_lvebx:
17019 case Intrinsic::ppc_altivec_lvehx:
17020 case Intrinsic::ppc_altivec_lvewx:
17021 case Intrinsic::ppc_vsx_lxvd2x:
17022 case Intrinsic::ppc_vsx_lxvw4x:
17023 case Intrinsic::ppc_vsx_lxvd2x_be:
17024 case Intrinsic::ppc_vsx_lxvw4x_be:
17025 case Intrinsic::ppc_vsx_lxvl:
17026 case Intrinsic::ppc_vsx_lxvll: {
17027 EVT VT;
17028 switch (Intrinsic) {
17029 case Intrinsic::ppc_altivec_lvebx:
17030 VT = MVT::i8;
17031 break;
17032 case Intrinsic::ppc_altivec_lvehx:
17033 VT = MVT::i16;
17034 break;
17035 case Intrinsic::ppc_altivec_lvewx:
17036 VT = MVT::i32;
17037 break;
17038 case Intrinsic::ppc_vsx_lxvd2x:
17039 case Intrinsic::ppc_vsx_lxvd2x_be:
17040 VT = MVT::v2f64;
17041 break;
17042 default:
17043 VT = MVT::v4i32;
17044 break;
17045 }
17046
17048 Info.memVT = VT;
17049 Info.ptrVal = I.getArgOperand(0);
17050 Info.offset = -VT.getStoreSize()+1;
17051 Info.size = 2*VT.getStoreSize()-1;
17052 Info.align = Align(1);
17054 return true;
17055 }
17056 case Intrinsic::ppc_altivec_stvx:
17057 case Intrinsic::ppc_altivec_stvxl:
17058 case Intrinsic::ppc_altivec_stvebx:
17059 case Intrinsic::ppc_altivec_stvehx:
17060 case Intrinsic::ppc_altivec_stvewx:
17061 case Intrinsic::ppc_vsx_stxvd2x:
17062 case Intrinsic::ppc_vsx_stxvw4x:
17063 case Intrinsic::ppc_vsx_stxvd2x_be:
17064 case Intrinsic::ppc_vsx_stxvw4x_be:
17065 case Intrinsic::ppc_vsx_stxvl:
17066 case Intrinsic::ppc_vsx_stxvll: {
17067 EVT VT;
17068 switch (Intrinsic) {
17069 case Intrinsic::ppc_altivec_stvebx:
17070 VT = MVT::i8;
17071 break;
17072 case Intrinsic::ppc_altivec_stvehx:
17073 VT = MVT::i16;
17074 break;
17075 case Intrinsic::ppc_altivec_stvewx:
17076 VT = MVT::i32;
17077 break;
17078 case Intrinsic::ppc_vsx_stxvd2x:
17079 case Intrinsic::ppc_vsx_stxvd2x_be:
17080 VT = MVT::v2f64;
17081 break;
17082 default:
17083 VT = MVT::v4i32;
17084 break;
17085 }
17086
17088 Info.memVT = VT;
17089 Info.ptrVal = I.getArgOperand(1);
17090 Info.offset = -VT.getStoreSize()+1;
17091 Info.size = 2*VT.getStoreSize()-1;
17092 Info.align = Align(1);
17094 return true;
17095 }
17096 case Intrinsic::ppc_stdcx:
17097 case Intrinsic::ppc_stwcx:
17098 case Intrinsic::ppc_sthcx:
17099 case Intrinsic::ppc_stbcx: {
17100 EVT VT;
17101 auto Alignment = Align(8);
17102 switch (Intrinsic) {
17103 case Intrinsic::ppc_stdcx:
17104 VT = MVT::i64;
17105 break;
17106 case Intrinsic::ppc_stwcx:
17107 VT = MVT::i32;
17108 Alignment = Align(4);
17109 break;
17110 case Intrinsic::ppc_sthcx:
17111 VT = MVT::i16;
17112 Alignment = Align(2);
17113 break;
17114 case Intrinsic::ppc_stbcx:
17115 VT = MVT::i8;
17116 Alignment = Align(1);
17117 break;
17118 }
17120 Info.memVT = VT;
17121 Info.ptrVal = I.getArgOperand(0);
17122 Info.offset = 0;
17123 Info.align = Alignment;
17125 return true;
17126 }
17127 default:
17128 break;
17129 }
17130
17131 return false;
17132}
17133
17134/// It returns EVT::Other if the type should be determined using generic
17135/// target-independent logic.
17137 const MemOp &Op, const AttributeList &FuncAttributes) const {
17138 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
17139 // We should use Altivec/VSX loads and stores when available. For unaligned
17140 // addresses, unaligned VSX loads are only fast starting with the P8.
17141 if (Subtarget.hasAltivec() && Op.size() >= 16) {
17142 if (Op.isMemset() && Subtarget.hasVSX()) {
17143 uint64_t TailSize = Op.size() % 16;
17144 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
17145 // element if vector element type matches tail store. For tail size
17146 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
17147 if (TailSize > 2 && TailSize <= 4) {
17148 return MVT::v8i16;
17149 }
17150 return MVT::v4i32;
17151 }
17152 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
17153 return MVT::v4i32;
17154 }
17155 }
17156
17157 if (Subtarget.isPPC64()) {
17158 return MVT::i64;
17159 }
17160
17161 return MVT::i32;
17162}
17163
17164/// Returns true if it is beneficial to convert a load of a constant
17165/// to just the constant itself.
17167 Type *Ty) const {
17168 assert(Ty->isIntegerTy());
17169
17170 unsigned BitSize = Ty->getPrimitiveSizeInBits();
17171 return !(BitSize == 0 || BitSize > 64);
17172}
17173
17175 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
17176 return false;
17177 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
17178 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
17179 return NumBits1 == 64 && NumBits2 == 32;
17180}
17181
17183 if (!VT1.isInteger() || !VT2.isInteger())
17184 return false;
17185 unsigned NumBits1 = VT1.getSizeInBits();
17186 unsigned NumBits2 = VT2.getSizeInBits();
17187 return NumBits1 == 64 && NumBits2 == 32;
17188}
17189
17191 // Generally speaking, zexts are not free, but they are free when they can be
17192 // folded with other operations.
17193 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
17194 EVT MemVT = LD->getMemoryVT();
17195 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
17196 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
17197 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
17198 LD->getExtensionType() == ISD::ZEXTLOAD))
17199 return true;
17200 }
17201
17202 // FIXME: Add other cases...
17203 // - 32-bit shifts with a zext to i64
17204 // - zext after ctlz, bswap, etc.
17205 // - zext after and by a constant mask
17206
17207 return TargetLowering::isZExtFree(Val, VT2);
17208}
17209
17210bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
17211 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17212 "invalid fpext types");
17213 // Extending to float128 is not free.
17214 if (DestVT == MVT::f128)
17215 return false;
17216 return true;
17217}
17218
17220 return isInt<16>(Imm) || isUInt<16>(Imm);
17221}
17222
17224 return isInt<16>(Imm) || isUInt<16>(Imm);
17225}
17226
17229 unsigned *Fast) const {
17231 return false;
17232
17233 // PowerPC supports unaligned memory access for simple non-vector types.
17234 // Although accessing unaligned addresses is not as efficient as accessing
17235 // aligned addresses, it is generally more efficient than manual expansion,
17236 // and generally only traps for software emulation when crossing page
17237 // boundaries.
17238
17239 if (!VT.isSimple())
17240 return false;
17241
17242 if (VT.isFloatingPoint() && !VT.isVector() &&
17243 !Subtarget.allowsUnalignedFPAccess())
17244 return false;
17245
17246 if (VT.getSimpleVT().isVector()) {
17247 if (Subtarget.hasVSX()) {
17248 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
17249 VT != MVT::v4f32 && VT != MVT::v4i32)
17250 return false;
17251 } else {
17252 return false;
17253 }
17254 }
17255
17256 if (VT == MVT::ppcf128)
17257 return false;
17258
17259 if (Fast)
17260 *Fast = 1;
17261
17262 return true;
17263}
17264
17266 SDValue C) const {
17267 // Check integral scalar types.
17268 if (!VT.isScalarInteger())
17269 return false;
17270 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
17271 if (!ConstNode->getAPIntValue().isSignedIntN(64))
17272 return false;
17273 // This transformation will generate >= 2 operations. But the following
17274 // cases will generate <= 2 instructions during ISEL. So exclude them.
17275 // 1. If the constant multiplier fits 16 bits, it can be handled by one
17276 // HW instruction, ie. MULLI
17277 // 2. If the multiplier after shifted fits 16 bits, an extra shift
17278 // instruction is needed than case 1, ie. MULLI and RLDICR
17279 int64_t Imm = ConstNode->getSExtValue();
17280 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
17281 Imm >>= Shift;
17282 if (isInt<16>(Imm))
17283 return false;
17284 uint64_t UImm = static_cast<uint64_t>(Imm);
17285 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
17286 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
17287 return true;
17288 }
17289 return false;
17290}
17291
17293 EVT VT) const {
17296}
17297
17299 Type *Ty) const {
17300 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
17301 return false;
17302 switch (Ty->getScalarType()->getTypeID()) {
17303 case Type::FloatTyID:
17304 case Type::DoubleTyID:
17305 return true;
17306 case Type::FP128TyID:
17307 return Subtarget.hasP9Vector();
17308 default:
17309 return false;
17310 }
17311}
17312
17313// FIXME: add more patterns which are not profitable to hoist.
17315 if (!I->hasOneUse())
17316 return true;
17317
17318 Instruction *User = I->user_back();
17319 assert(User && "A single use instruction with no uses.");
17320
17321 switch (I->getOpcode()) {
17322 case Instruction::FMul: {
17323 // Don't break FMA, PowerPC prefers FMA.
17324 if (User->getOpcode() != Instruction::FSub &&
17325 User->getOpcode() != Instruction::FAdd)
17326 return true;
17327
17329 const Function *F = I->getFunction();
17330 const DataLayout &DL = F->getParent()->getDataLayout();
17331 Type *Ty = User->getOperand(0)->getType();
17332
17333 return !(
17336 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
17337 }
17338 case Instruction::Load: {
17339 // Don't break "store (load float*)" pattern, this pattern will be combined
17340 // to "store (load int32)" in later InstCombine pass. See function
17341 // combineLoadToOperationType. On PowerPC, loading a float point takes more
17342 // cycles than loading a 32 bit integer.
17343 LoadInst *LI = cast<LoadInst>(I);
17344 // For the loads that combineLoadToOperationType does nothing, like
17345 // ordered load, it should be profitable to hoist them.
17346 // For swifterror load, it can only be used for pointer to pointer type, so
17347 // later type check should get rid of this case.
17348 if (!LI->isUnordered())
17349 return true;
17350
17351 if (User->getOpcode() != Instruction::Store)
17352 return true;
17353
17354 if (I->getType()->getTypeID() != Type::FloatTyID)
17355 return true;
17356
17357 return false;
17358 }
17359 default:
17360 return true;
17361 }
17362 return true;
17363}
17364
17365const MCPhysReg *
17367 // LR is a callee-save register, but we must treat it as clobbered by any call
17368 // site. Hence we include LR in the scratch registers, which are in turn added
17369 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17370 // to CTR, which is used by any indirect call.
17371 static const MCPhysReg ScratchRegs[] = {
17372 PPC::X12, PPC::LR8, PPC::CTR8, 0
17373 };
17374
17375 return ScratchRegs;
17376}
17377
17379 const Constant *PersonalityFn) const {
17380 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17381}
17382
17384 const Constant *PersonalityFn) const {
17385 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17386}
17387
17388bool
17390 EVT VT , unsigned DefinedValues) const {
17391 if (VT == MVT::v2i64)
17392 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17393
17394 if (Subtarget.hasVSX())
17395 return true;
17396
17398}
17399
17401 if (DisableILPPref || Subtarget.enableMachineScheduler())
17403
17404 return Sched::ILP;
17405}
17406
17407// Create a fast isel object.
17408FastISel *
17410 const TargetLibraryInfo *LibInfo) const {
17411 return PPC::createFastISel(FuncInfo, LibInfo);
17412}
17413
17414// 'Inverted' means the FMA opcode after negating one multiplicand.
17415// For example, (fma -a b c) = (fnmsub a b c)
17416static unsigned invertFMAOpcode(unsigned Opc) {
17417 switch (Opc) {
17418 default:
17419 llvm_unreachable("Invalid FMA opcode for PowerPC!");
17420 case ISD::FMA:
17421 return PPCISD::FNMSUB;
17422 case PPCISD::FNMSUB:
17423 return ISD::FMA;
17424 }
17425}
17426
17428 bool LegalOps, bool OptForSize,
17430 unsigned Depth) const {
17432 return SDValue();
17433
17434 unsigned Opc = Op.getOpcode();
17435 EVT VT = Op.getValueType();
17436 SDNodeFlags Flags = Op.getNode()->getFlags();
17437
17438 switch (Opc) {
17439 case PPCISD::FNMSUB:
17440 if (!Op.hasOneUse() || !isTypeLegal(VT))
17441 break;
17442
17444 SDValue N0 = Op.getOperand(0);
17445 SDValue N1 = Op.getOperand(1);
17446 SDValue N2 = Op.getOperand(2);
17447 SDLoc Loc(Op);
17448
17450 SDValue NegN2 =
17451 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
17452
17453 if (!NegN2)
17454 return SDValue();
17455
17456 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
17457 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
17458 // These transformations may change sign of zeroes. For example,
17459 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
17460 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
17461 // Try and choose the cheaper one to negate.
17463 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
17464 N0Cost, Depth + 1);
17465
17467 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
17468 N1Cost, Depth + 1);
17469
17470 if (NegN0 && N0Cost <= N1Cost) {
17471 Cost = std::min(N0Cost, N2Cost);
17472 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
17473 } else if (NegN1) {
17474 Cost = std::min(N1Cost, N2Cost);
17475 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
17476 }
17477 }
17478
17479 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
17480 if (isOperationLegal(ISD::FMA, VT)) {
17481 Cost = N2Cost;
17482 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
17483 }
17484
17485 break;
17486 }
17487
17488 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
17489 Cost, Depth);
17490}
17491
17492// Override to enable LOAD_STACK_GUARD lowering on Linux.
17494 if (!Subtarget.isTargetLinux())
17496 return true;
17497}
17498
17499// Override to disable global variable loading on Linux and insert AIX canary
17500// word declaration.
17502 if (Subtarget.isAIXABI()) {
17503 M.getOrInsertGlobal(AIXSSPCanaryWordName,
17504 PointerType::getUnqual(M.getContext()));
17505 return;
17506 }
17507 if (!Subtarget.isTargetLinux())
17509}
17510
17512 if (Subtarget.isAIXABI())
17513 return M.getGlobalVariable(AIXSSPCanaryWordName);
17515}
17516
17518 bool ForCodeSize) const {
17519 if (!VT.isSimple() || !Subtarget.hasVSX())
17520 return false;
17521
17522 switch(VT.getSimpleVT().SimpleTy) {
17523 default:
17524 // For FP types that are currently not supported by PPC backend, return
17525 // false. Examples: f16, f80.
17526 return false;
17527 case MVT::f32:
17528 case MVT::f64: {
17529 if (Subtarget.hasPrefixInstrs()) {
17530 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
17531 return true;
17532 }
17533 bool IsExact;
17534 APSInt IntResult(16, false);
17535 // The rounding mode doesn't really matter because we only care about floats
17536 // that can be converted to integers exactly.
17537 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
17538 // For exact values in the range [-16, 15] we can materialize the float.
17539 if (IsExact && IntResult <= 15 && IntResult >= -16)
17540 return true;
17541 return Imm.isZero();
17542 }
17543 case MVT::ppcf128:
17544 return Imm.isPosZero();
17545 }
17546}
17547
17548// For vector shift operation op, fold
17549// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
17551 SelectionDAG &DAG) {
17552 SDValue N0 = N->getOperand(0);
17553 SDValue N1 = N->getOperand(1);
17554 EVT VT = N0.getValueType();
17555 unsigned OpSizeInBits = VT.getScalarSizeInBits();
17556 unsigned Opcode = N->getOpcode();
17557 unsigned TargetOpcode;
17558
17559 switch (Opcode) {
17560 default:
17561 llvm_unreachable("Unexpected shift operation");
17562 case ISD::SHL:
17563 TargetOpcode = PPCISD::SHL;
17564 break;
17565 case ISD::SRL:
17566 TargetOpcode = PPCISD::SRL;
17567 break;
17568 case ISD::SRA:
17569 TargetOpcode = PPCISD::SRA;
17570 break;
17571 }
17572
17573 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
17574 N1->getOpcode() == ISD::AND)
17575 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
17576 if (Mask->getZExtValue() == OpSizeInBits - 1)
17577 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
17578
17579 return SDValue();
17580}
17581
17582SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
17583 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17584 return Value;
17585
17586 SDValue N0 = N->getOperand(0);
17587 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17588 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
17589 N0.getOpcode() != ISD::SIGN_EXTEND ||
17590 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
17591 N->getValueType(0) != MVT::i64)
17592 return SDValue();
17593
17594 // We can't save an operation here if the value is already extended, and
17595 // the existing shift is easier to combine.
17596 SDValue ExtsSrc = N0.getOperand(0);
17597 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
17598 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
17599 return SDValue();
17600
17601 SDLoc DL(N0);
17602 SDValue ShiftBy = SDValue(CN1, 0);
17603 // We want the shift amount to be i32 on the extswli, but the shift could
17604 // have an i64.
17605 if (ShiftBy.getValueType() == MVT::i64)
17606 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
17607
17608 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
17609 ShiftBy);
17610}
17611
17612SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
17613 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17614 return Value;
17615
17616 return SDValue();
17617}
17618
17619SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
17620 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17621 return Value;
17622
17623 return SDValue();
17624}
17625
17626// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17627// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17628// When C is zero, the equation (addi Z, -C) can be simplified to Z
17629// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17631 const PPCSubtarget &Subtarget) {
17632 if (!Subtarget.isPPC64())
17633 return SDValue();
17634
17635 SDValue LHS = N->getOperand(0);
17636 SDValue RHS = N->getOperand(1);
17637
17638 auto isZextOfCompareWithConstant = [](SDValue Op) {
17639 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
17640 Op.getValueType() != MVT::i64)
17641 return false;
17642
17643 SDValue Cmp = Op.getOperand(0);
17644 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
17645 Cmp.getOperand(0).getValueType() != MVT::i64)
17646 return false;
17647
17648 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
17649 int64_t NegConstant = 0 - Constant->getSExtValue();
17650 // Due to the limitations of the addi instruction,
17651 // -C is required to be [-32768, 32767].
17652 return isInt<16>(NegConstant);
17653 }
17654
17655 return false;
17656 };
17657
17658 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
17659 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
17660
17661 // If there is a pattern, canonicalize a zext operand to the RHS.
17662 if (LHSHasPattern && !RHSHasPattern)
17663 std::swap(LHS, RHS);
17664 else if (!LHSHasPattern && !RHSHasPattern)
17665 return SDValue();
17666
17667 SDLoc DL(N);
17668 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
17669 SDValue Cmp = RHS.getOperand(0);
17670 SDValue Z = Cmp.getOperand(0);
17671 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
17672 int64_t NegConstant = 0 - Constant->getSExtValue();
17673
17674 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
17675 default: break;
17676 case ISD::SETNE: {
17677 // when C == 0
17678 // --> addze X, (addic Z, -1).carry
17679 // /
17680 // add X, (zext(setne Z, C))--
17681 // \ when -32768 <= -C <= 32767 && C != 0
17682 // --> addze X, (addic (addi Z, -C), -1).carry
17683 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17684 DAG.getConstant(NegConstant, DL, MVT::i64));
17685 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17686 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17687 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
17688 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17689 SDValue(Addc.getNode(), 1));
17690 }
17691 case ISD::SETEQ: {
17692 // when C == 0
17693 // --> addze X, (subfic Z, 0).carry
17694 // /
17695 // add X, (zext(sete Z, C))--
17696 // \ when -32768 <= -C <= 32767 && C != 0
17697 // --> addze X, (subfic (addi Z, -C), 0).carry
17698 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17699 DAG.getConstant(NegConstant, DL, MVT::i64));
17700 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17701 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17702 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
17703 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17704 SDValue(Subc.getNode(), 1));
17705 }
17706 }
17707
17708 return SDValue();
17709}
17710
17711// Transform
17712// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17713// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17714// In this case both C1 and C2 must be known constants.
17715// C1+C2 must fit into a 34 bit signed integer.
17717 const PPCSubtarget &Subtarget) {
17718 if (!Subtarget.isUsingPCRelativeCalls())
17719 return SDValue();
17720
17721 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17722 // If we find that node try to cast the Global Address and the Constant.
17723 SDValue LHS = N->getOperand(0);
17724 SDValue RHS = N->getOperand(1);
17725
17726 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17727 std::swap(LHS, RHS);
17728
17729 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17730 return SDValue();
17731
17732 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
17733 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
17734 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
17735
17736 // Check that both casts succeeded.
17737 if (!GSDN || !ConstNode)
17738 return SDValue();
17739
17740 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
17741 SDLoc DL(GSDN);
17742
17743 // The signed int offset needs to fit in 34 bits.
17744 if (!isInt<34>(NewOffset))
17745 return SDValue();
17746
17747 // The new global address is a copy of the old global address except
17748 // that it has the updated Offset.
17749 SDValue GA =
17750 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
17751 NewOffset, GSDN->getTargetFlags());
17752 SDValue MatPCRel =
17753 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
17754 return MatPCRel;
17755}
17756
17757SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
17758 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
17759 return Value;
17760
17761 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
17762 return Value;
17763
17764 return SDValue();
17765}
17766
17767// Detect TRUNCATE operations on bitcasts of float128 values.
17768// What we are looking for here is the situtation where we extract a subset
17769// of bits from a 128 bit float.
17770// This can be of two forms:
17771// 1) BITCAST of f128 feeding TRUNCATE
17772// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
17773// The reason this is required is because we do not have a legal i128 type
17774// and so we want to prevent having to store the f128 and then reload part
17775// of it.
17776SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
17777 DAGCombinerInfo &DCI) const {
17778 // If we are using CRBits then try that first.
17779 if (Subtarget.useCRBits()) {
17780 // Check if CRBits did anything and return that if it did.
17781 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
17782 return CRTruncValue;
17783 }
17784
17785 SDLoc dl(N);
17786 SDValue Op0 = N->getOperand(0);
17787
17788 // Looking for a truncate of i128 to i64.
17789 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
17790 return SDValue();
17791
17792 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
17793
17794 // SRL feeding TRUNCATE.
17795 if (Op0.getOpcode() == ISD::SRL) {
17796 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
17797 // The right shift has to be by 64 bits.
17798 if (!ConstNode || ConstNode->getZExtValue() != 64)
17799 return SDValue();
17800
17801 // Switch the element number to extract.
17802 EltToExtract = EltToExtract ? 0 : 1;
17803 // Update Op0 past the SRL.
17804 Op0 = Op0.getOperand(0);
17805 }
17806
17807 // BITCAST feeding a TRUNCATE possibly via SRL.
17808 if (Op0.getOpcode() == ISD::BITCAST &&
17809 Op0.getValueType() == MVT::i128 &&
17810 Op0.getOperand(0).getValueType() == MVT::f128) {
17811 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
17812 return DCI.DAG.getNode(
17813 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
17814 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
17815 }
17816 return SDValue();
17817}
17818
17819SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
17820 SelectionDAG &DAG = DCI.DAG;
17821
17822 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
17823 if (!ConstOpOrElement)
17824 return SDValue();
17825
17826 // An imul is usually smaller than the alternative sequence for legal type.
17828 isOperationLegal(ISD::MUL, N->getValueType(0)))
17829 return SDValue();
17830
17831 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
17832 switch (this->Subtarget.getCPUDirective()) {
17833 default:
17834 // TODO: enhance the condition for subtarget before pwr8
17835 return false;
17836 case PPC::DIR_PWR8:
17837 // type mul add shl
17838 // scalar 4 1 1
17839 // vector 7 2 2
17840 return true;
17841 case PPC::DIR_PWR9:
17842 case PPC::DIR_PWR10:
17844 // type mul add shl
17845 // scalar 5 2 2
17846 // vector 7 2 2
17847
17848 // The cycle RATIO of related operations are showed as a table above.
17849 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
17850 // scalar and vector type. For 2 instrs patterns, add/sub + shl
17851 // are 4, it is always profitable; but for 3 instrs patterns
17852 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
17853 // So we should only do it for vector type.
17854 return IsAddOne && IsNeg ? VT.isVector() : true;
17855 }
17856 };
17857
17858 EVT VT = N->getValueType(0);
17859 SDLoc DL(N);
17860
17861 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
17862 bool IsNeg = MulAmt.isNegative();
17863 APInt MulAmtAbs = MulAmt.abs();
17864
17865 if ((MulAmtAbs - 1).isPowerOf2()) {
17866 // (mul x, 2^N + 1) => (add (shl x, N), x)
17867 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
17868
17869 if (!IsProfitable(IsNeg, true, VT))
17870 return SDValue();
17871
17872 SDValue Op0 = N->getOperand(0);
17873 SDValue Op1 =
17874 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
17875 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
17876 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
17877
17878 if (!IsNeg)
17879 return Res;
17880
17881 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
17882 } else if ((MulAmtAbs + 1).isPowerOf2()) {
17883 // (mul x, 2^N - 1) => (sub (shl x, N), x)
17884 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
17885
17886 if (!IsProfitable(IsNeg, false, VT))
17887 return SDValue();
17888
17889 SDValue Op0 = N->getOperand(0);
17890 SDValue Op1 =
17891 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
17892 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
17893
17894 if (!IsNeg)
17895 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
17896 else
17897 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
17898
17899 } else {
17900 return SDValue();
17901 }
17902}
17903
17904// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
17905// in combiner since we need to check SD flags and other subtarget features.
17906SDValue PPCTargetLowering::combineFMALike(SDNode *N,
17907 DAGCombinerInfo &DCI) const {
17908 SDValue N0 = N->getOperand(0);
17909 SDValue N1 = N->getOperand(1);
17910 SDValue N2 = N->getOperand(2);
17911 SDNodeFlags Flags = N->getFlags();
17912 EVT VT = N->getValueType(0);
17913 SelectionDAG &DAG = DCI.DAG;
17915 unsigned Opc = N->getOpcode();
17916 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
17917 bool LegalOps = !DCI.isBeforeLegalizeOps();
17918 SDLoc Loc(N);
17919
17920 if (!isOperationLegal(ISD::FMA, VT))
17921 return SDValue();
17922
17923 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
17924 // since (fnmsub a b c)=-0 while c-ab=+0.
17925 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
17926 return SDValue();
17927
17928 // (fma (fneg a) b c) => (fnmsub a b c)
17929 // (fnmsub (fneg a) b c) => (fma a b c)
17930 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
17931 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
17932
17933 // (fma a (fneg b) c) => (fnmsub a b c)
17934 // (fnmsub a (fneg b) c) => (fma a b c)
17935 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
17936 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
17937
17938 return SDValue();
17939}
17940
17941bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
17942 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
17943 if (!Subtarget.is64BitELFABI())
17944 return false;
17945
17946 // If not a tail call then no need to proceed.
17947 if (!CI->isTailCall())
17948 return false;
17949
17950 // If sibling calls have been disabled and tail-calls aren't guaranteed
17951 // there is no reason to duplicate.
17952 auto &TM = getTargetMachine();
17953 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
17954 return false;
17955
17956 // Can't tail call a function called indirectly, or if it has variadic args.
17957 const Function *Callee = CI->getCalledFunction();
17958 if (!Callee || Callee->isVarArg())
17959 return false;
17960
17961 // Make sure the callee and caller calling conventions are eligible for tco.
17962 const Function *Caller = CI->getParent()->getParent();
17963 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
17964 CI->getCallingConv()))
17965 return false;
17966
17967 // If the function is local then we have a good chance at tail-calling it
17968 return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
17969}
17970
17971bool PPCTargetLowering::
17972isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
17973 const Value *Mask = AndI.getOperand(1);
17974 // If the mask is suitable for andi. or andis. we should sink the and.
17975 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
17976 // Can't handle constants wider than 64-bits.
17977 if (CI->getBitWidth() > 64)
17978 return false;
17979 int64_t ConstVal = CI->getZExtValue();
17980 return isUInt<16>(ConstVal) ||
17981 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
17982 }
17983
17984 // For non-constant masks, we can always use the record-form and.
17985 return true;
17986}
17987
17988/// getAddrModeForFlags - Based on the set of address flags, select the most
17989/// optimal instruction format to match by.
17990PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
17991 // This is not a node we should be handling here.
17992 if (Flags == PPC::MOF_None)
17993 return PPC::AM_None;
17994 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
17995 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
17996 if ((Flags & FlagSet) == FlagSet)
17997 return PPC::AM_DForm;
17998 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
17999 if ((Flags & FlagSet) == FlagSet)
18000 return PPC::AM_DSForm;
18001 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
18002 if ((Flags & FlagSet) == FlagSet)
18003 return PPC::AM_DQForm;
18004 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
18005 if ((Flags & FlagSet) == FlagSet)
18006 return PPC::AM_PrefixDForm;
18007 // If no other forms are selected, return an X-Form as it is the most
18008 // general addressing mode.
18009 return PPC::AM_XForm;
18010}
18011
18012/// Set alignment flags based on whether or not the Frame Index is aligned.
18013/// Utilized when computing flags for address computation when selecting
18014/// load and store instructions.
18015static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
18016 SelectionDAG &DAG) {
18017 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
18018 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
18019 if (!FI)
18020 return;
18022 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
18023 // If this is (add $FI, $S16Imm), the alignment flags are already set
18024 // based on the immediate. We just need to clear the alignment flags
18025 // if the FI alignment is weaker.
18026 if ((FrameIndexAlign % 4) != 0)
18027 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
18028 if ((FrameIndexAlign % 16) != 0)
18029 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
18030 // If the address is a plain FrameIndex, set alignment flags based on
18031 // FI alignment.
18032 if (!IsAdd) {
18033 if ((FrameIndexAlign % 4) == 0)
18034 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18035 if ((FrameIndexAlign % 16) == 0)
18036 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18037 }
18038}
18039
18040/// Given a node, compute flags that are used for address computation when
18041/// selecting load and store instructions. The flags computed are stored in
18042/// FlagSet. This function takes into account whether the node is a constant,
18043/// an ADD, OR, or a constant, and computes the address flags accordingly.
18044static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
18045 SelectionDAG &DAG) {
18046 // Set the alignment flags for the node depending on if the node is
18047 // 4-byte or 16-byte aligned.
18048 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
18049 if ((Imm & 0x3) == 0)
18050 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18051 if ((Imm & 0xf) == 0)
18052 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18053 };
18054
18055 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
18056 // All 32-bit constants can be computed as LIS + Disp.
18057 const APInt &ConstImm = CN->getAPIntValue();
18058 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
18059 FlagSet |= PPC::MOF_AddrIsSImm32;
18060 SetAlignFlagsForImm(ConstImm.getZExtValue());
18061 setAlignFlagsForFI(N, FlagSet, DAG);
18062 }
18063 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
18064 FlagSet |= PPC::MOF_RPlusSImm34;
18065 else // Let constant materialization handle large constants.
18066 FlagSet |= PPC::MOF_NotAddNorCst;
18067 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
18068 // This address can be represented as an addition of:
18069 // - Register + Imm16 (possibly a multiple of 4/16)
18070 // - Register + Imm34
18071 // - Register + PPCISD::Lo
18072 // - Register + Register
18073 // In any case, we won't have to match this as Base + Zero.
18074 SDValue RHS = N.getOperand(1);
18075 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
18076 const APInt &ConstImm = CN->getAPIntValue();
18077 if (ConstImm.isSignedIntN(16)) {
18078 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
18079 SetAlignFlagsForImm(ConstImm.getZExtValue());
18080 setAlignFlagsForFI(N, FlagSet, DAG);
18081 }
18082 if (ConstImm.isSignedIntN(34))
18083 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
18084 else
18085 FlagSet |= PPC::MOF_RPlusR; // Register.
18086 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
18087 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
18088 else
18089 FlagSet |= PPC::MOF_RPlusR;
18090 } else { // The address computation is not a constant or an addition.
18091 setAlignFlagsForFI(N, FlagSet, DAG);
18092 FlagSet |= PPC::MOF_NotAddNorCst;
18093 }
18094}
18095
18096static bool isPCRelNode(SDValue N) {
18097 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
18098 isValidPCRelNode<ConstantPoolSDNode>(N) ||
18099 isValidPCRelNode<GlobalAddressSDNode>(N) ||
18100 isValidPCRelNode<JumpTableSDNode>(N) ||
18101 isValidPCRelNode<BlockAddressSDNode>(N));
18102}
18103
18104/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18105/// the address flags of the load/store instruction that is to be matched.
18106unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
18107 SelectionDAG &DAG) const {
18108 unsigned FlagSet = PPC::MOF_None;
18109
18110 // Compute subtarget flags.
18111 if (!Subtarget.hasP9Vector())
18112 FlagSet |= PPC::MOF_SubtargetBeforeP9;
18113 else {
18114 FlagSet |= PPC::MOF_SubtargetP9;
18115 if (Subtarget.hasPrefixInstrs())
18116 FlagSet |= PPC::MOF_SubtargetP10;
18117 }
18118 if (Subtarget.hasSPE())
18119 FlagSet |= PPC::MOF_SubtargetSPE;
18120
18121 // Check if we have a PCRel node and return early.
18122 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
18123 return FlagSet;
18124
18125 // If the node is the paired load/store intrinsics, compute flags for
18126 // address computation and return early.
18127 unsigned ParentOp = Parent->getOpcode();
18128 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
18129 (ParentOp == ISD::INTRINSIC_VOID))) {
18130 unsigned ID = Parent->getConstantOperandVal(1);
18131 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
18132 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
18133 ? Parent->getOperand(2)
18134 : Parent->getOperand(3);
18135 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
18136 FlagSet |= PPC::MOF_Vector;
18137 return FlagSet;
18138 }
18139 }
18140
18141 // Mark this as something we don't want to handle here if it is atomic
18142 // or pre-increment instruction.
18143 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
18144 if (LSB->isIndexed())
18145 return PPC::MOF_None;
18146
18147 // Compute in-memory type flags. This is based on if there are scalars,
18148 // floats or vectors.
18149 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
18150 assert(MN && "Parent should be a MemSDNode!");
18151 EVT MemVT = MN->getMemoryVT();
18152 unsigned Size = MemVT.getSizeInBits();
18153 if (MemVT.isScalarInteger()) {
18154 assert(Size <= 128 &&
18155 "Not expecting scalar integers larger than 16 bytes!");
18156 if (Size < 32)
18157 FlagSet |= PPC::MOF_SubWordInt;
18158 else if (Size == 32)
18159 FlagSet |= PPC::MOF_WordInt;
18160 else
18161 FlagSet |= PPC::MOF_DoubleWordInt;
18162 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
18163 if (Size == 128)
18164 FlagSet |= PPC::MOF_Vector;
18165 else if (Size == 256) {
18166 assert(Subtarget.pairedVectorMemops() &&
18167 "256-bit vectors are only available when paired vector memops is "
18168 "enabled!");
18169 FlagSet |= PPC::MOF_Vector;
18170 } else
18171 llvm_unreachable("Not expecting illegal vectors!");
18172 } else { // Floating point type: can be scalar, f128 or vector types.
18173 if (Size == 32 || Size == 64)
18174 FlagSet |= PPC::MOF_ScalarFloat;
18175 else if (MemVT == MVT::f128 || MemVT.isVector())
18176 FlagSet |= PPC::MOF_Vector;
18177 else
18178 llvm_unreachable("Not expecting illegal scalar floats!");
18179 }
18180
18181 // Compute flags for address computation.
18182 computeFlagsForAddressComputation(N, FlagSet, DAG);
18183
18184 // Compute type extension flags.
18185 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
18186 switch (LN->getExtensionType()) {
18187 case ISD::SEXTLOAD:
18188 FlagSet |= PPC::MOF_SExt;
18189 break;
18190 case ISD::EXTLOAD:
18191 case ISD::ZEXTLOAD:
18192 FlagSet |= PPC::MOF_ZExt;
18193 break;
18194 case ISD::NON_EXTLOAD:
18195 FlagSet |= PPC::MOF_NoExt;
18196 break;
18197 }
18198 } else
18199 FlagSet |= PPC::MOF_NoExt;
18200
18201 // For integers, no extension is the same as zero extension.
18202 // We set the extension mode to zero extension so we don't have
18203 // to add separate entries in AddrModesMap for loads and stores.
18204 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
18205 FlagSet |= PPC::MOF_ZExt;
18206 FlagSet &= ~PPC::MOF_NoExt;
18207 }
18208
18209 // If we don't have prefixed instructions, 34-bit constants should be
18210 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18211 bool IsNonP1034BitConst =
18213 FlagSet) == PPC::MOF_RPlusSImm34;
18214 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
18215 IsNonP1034BitConst)
18216 FlagSet |= PPC::MOF_NotAddNorCst;
18217
18218 return FlagSet;
18219}
18220
18221/// SelectForceXFormMode - Given the specified address, force it to be
18222/// represented as an indexed [r+r] operation (an XForm instruction).
18224 SDValue &Base,
18225 SelectionDAG &DAG) const {
18226
18228 int16_t ForceXFormImm = 0;
18229 if (provablyDisjointOr(DAG, N) &&
18230 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
18231 Disp = N.getOperand(0);
18232 Base = N.getOperand(1);
18233 return Mode;
18234 }
18235
18236 // If the address is the result of an add, we will utilize the fact that the
18237 // address calculation includes an implicit add. However, we can reduce
18238 // register pressure if we do not materialize a constant just for use as the
18239 // index register. We only get rid of the add if it is not an add of a
18240 // value and a 16-bit signed constant and both have a single use.
18241 if (N.getOpcode() == ISD::ADD &&
18242 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
18243 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
18244 Disp = N.getOperand(0);
18245 Base = N.getOperand(1);
18246 return Mode;
18247 }
18248
18249 // Otherwise, use R0 as the base register.
18250 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18251 N.getValueType());
18252 Base = N;
18253
18254 return Mode;
18255}
18256
18258 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18259 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18260 EVT ValVT = Val.getValueType();
18261 // If we are splitting a scalar integer into f64 parts (i.e. so they
18262 // can be placed into VFRC registers), we need to zero extend and
18263 // bitcast the values. This will ensure the value is placed into a
18264 // VSR using direct moves or stack operations as needed.
18265 if (PartVT == MVT::f64 &&
18266 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
18267 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18268 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18269 Parts[0] = Val;
18270 return true;
18271 }
18272 return false;
18273}
18274
18275SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18276 SelectionDAG &DAG) const {
18277 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18279 EVT RetVT = Op.getValueType();
18280 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
18281 SDValue Callee =
18282 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
18283 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
18286 for (const SDValue &N : Op->op_values()) {
18287 EVT ArgVT = N.getValueType();
18288 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
18289 Entry.Node = N;
18290 Entry.Ty = ArgTy;
18291 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
18292 Entry.IsZExt = !Entry.IsSExt;
18293 Args.push_back(Entry);
18294 }
18295
18296 SDValue InChain = DAG.getEntryNode();
18297 SDValue TCChain = InChain;
18298 const Function &F = DAG.getMachineFunction().getFunction();
18299 bool isTailCall =
18300 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
18301 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
18302 if (isTailCall)
18303 InChain = TCChain;
18304 CLI.setDebugLoc(SDLoc(Op))
18305 .setChain(InChain)
18306 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
18307 .setTailCall(isTailCall)
18308 .setSExtResult(SignExtend)
18309 .setZExtResult(!SignExtend)
18311 return TLI.LowerCallTo(CLI).first;
18312}
18313
18314SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18315 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
18316 SelectionDAG &DAG) const {
18317 if (Op.getValueType() == MVT::f32)
18318 return lowerToLibCall(LibCallFloatName, Op, DAG);
18319
18320 if (Op.getValueType() == MVT::f64)
18321 return lowerToLibCall(LibCallDoubleName, Op, DAG);
18322
18323 return SDValue();
18324}
18325
18326bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18327 SDNodeFlags Flags = Op.getNode()->getFlags();
18328 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18329 Flags.hasNoNaNs() && Flags.hasNoInfs();
18330}
18331
18332bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18333 return Op.getNode()->getFlags().hasApproximateFuncs();
18334}
18335
18336bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18338}
18339
18340SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18341 const char *LibCallFloatName,
18342 const char *LibCallDoubleNameFinite,
18343 const char *LibCallFloatNameFinite,
18344 SDValue Op,
18345 SelectionDAG &DAG) const {
18346 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
18347 return SDValue();
18348
18349 if (!isLowringToMASSFiniteSafe(Op))
18350 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18351 DAG);
18352
18353 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18354 LibCallDoubleNameFinite, Op, DAG);
18355}
18356
18357SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18358 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18359 "__xl_powf_finite", Op, DAG);
18360}
18361
18362SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18363 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18364 "__xl_sinf_finite", Op, DAG);
18365}
18366
18367SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18368 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18369 "__xl_cosf_finite", Op, DAG);
18370}
18371
18372SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18373 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18374 "__xl_logf_finite", Op, DAG);
18375}
18376
18377SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18378 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18379 "__xl_log10f_finite", Op, DAG);
18380}
18381
18382SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18383 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18384 "__xl_expf_finite", Op, DAG);
18385}
18386
18387// If we happen to match to an aligned D-Form, check if the Frame Index is
18388// adequately aligned. If it is not, reset the mode to match to X-Form.
18389static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18390 PPC::AddrMode &Mode) {
18391 if (!isa<FrameIndexSDNode>(N))
18392 return;
18393 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
18394 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18395 Mode = PPC::AM_XForm;
18396}
18397
18398/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18399/// compute the address flags of the node, get the optimal address mode based
18400/// on the flags, and set the Base and Disp based on the address mode.
18402 SDValue N, SDValue &Disp,
18403 SDValue &Base,
18404 SelectionDAG &DAG,
18405 MaybeAlign Align) const {
18406 SDLoc DL(Parent);
18407
18408 // Compute the address flags.
18409 unsigned Flags = computeMOFlags(Parent, N, DAG);
18410
18411 // Get the optimal address mode based on the Flags.
18412 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18413
18414 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18415 // Select an X-Form load if it is not.
18416 setXFormForUnalignedFI(N, Flags, Mode);
18417
18418 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18419 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18420 assert(Subtarget.isUsingPCRelativeCalls() &&
18421 "Must be using PC-Relative calls when a valid PC-Relative node is "
18422 "present!");
18423 Mode = PPC::AM_PCRel;
18424 }
18425
18426 // Set Base and Disp accordingly depending on the address mode.
18427 switch (Mode) {
18428 case PPC::AM_DForm:
18429 case PPC::AM_DSForm:
18430 case PPC::AM_DQForm: {
18431 // This is a register plus a 16-bit immediate. The base will be the
18432 // register and the displacement will be the immediate unless it
18433 // isn't sufficiently aligned.
18434 if (Flags & PPC::MOF_RPlusSImm16) {
18435 SDValue Op0 = N.getOperand(0);
18436 SDValue Op1 = N.getOperand(1);
18437 int16_t Imm = Op1->getAsZExtVal();
18438 if (!Align || isAligned(*Align, Imm)) {
18439 Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
18440 Base = Op0;
18441 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
18442 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18443 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18444 }
18445 break;
18446 }
18447 }
18448 // This is a register plus the @lo relocation. The base is the register
18449 // and the displacement is the global address.
18450 else if (Flags & PPC::MOF_RPlusLo) {
18451 Disp = N.getOperand(1).getOperand(0); // The global address.
18456 Base = N.getOperand(0);
18457 break;
18458 }
18459 // This is a constant address at most 32 bits. The base will be
18460 // zero or load-immediate-shifted and the displacement will be
18461 // the low 16 bits of the address.
18462 else if (Flags & PPC::MOF_AddrIsSImm32) {
18463 auto *CN = cast<ConstantSDNode>(N);
18464 EVT CNType = CN->getValueType(0);
18465 uint64_t CNImm = CN->getZExtValue();
18466 // If this address fits entirely in a 16-bit sext immediate field, codegen
18467 // this as "d, 0".
18468 int16_t Imm;
18469 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
18470 Disp = DAG.getTargetConstant(Imm, DL, CNType);
18471 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18472 CNType);
18473 break;
18474 }
18475 // Handle 32-bit sext immediate with LIS + Addr mode.
18476 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
18477 (!Align || isAligned(*Align, CNImm))) {
18478 int32_t Addr = (int32_t)CNImm;
18479 // Otherwise, break this down into LIS + Disp.
18480 Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
18481 Base =
18482 DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
18483 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
18484 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
18485 break;
18486 }
18487 }
18488 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18489 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
18490 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
18491 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18492 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18493 } else
18494 Base = N;
18495 break;
18496 }
18497 case PPC::AM_PrefixDForm: {
18498 int64_t Imm34 = 0;
18499 unsigned Opcode = N.getOpcode();
18500 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
18501 (isIntS34Immediate(N.getOperand(1), Imm34))) {
18502 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18503 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18504 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
18505 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18506 else
18507 Base = N.getOperand(0);
18508 } else if (isIntS34Immediate(N, Imm34)) {
18509 // The address is a 34-bit signed immediate.
18510 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18511 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
18512 }
18513 break;
18514 }
18515 case PPC::AM_PCRel: {
18516 // When selecting PC-Relative instructions, "Base" is not utilized as
18517 // we select the address as [PC+imm].
18518 Disp = N;
18519 break;
18520 }
18521 case PPC::AM_None:
18522 break;
18523 default: { // By default, X-Form is always available to be selected.
18524 // When a frame index is not aligned, we also match by XForm.
18525 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
18526 Base = FI ? N : N.getOperand(1);
18527 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18528 N.getValueType())
18529 : N.getOperand(0);
18530 break;
18531 }
18532 }
18533 return Mode;
18534}
18535
18537 bool Return,
18538 bool IsVarArg) const {
18539 switch (CC) {
18540 case CallingConv::Cold:
18541 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
18542 default:
18543 return CC_PPC64_ELF;
18544 }
18545}
18546
18548 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
18549}
18550
18553 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18554 if (shouldInlineQuadwordAtomics() && Size == 128)
18556
18557 switch (AI->getOperation()) {
18561 default:
18563 }
18564
18565 llvm_unreachable("unreachable atomicrmw operation");
18566}
18567
18570 unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
18571 if (shouldInlineQuadwordAtomics() && Size == 128)
18574}
18575
18576static Intrinsic::ID
18578 switch (BinOp) {
18579 default:
18580 llvm_unreachable("Unexpected AtomicRMW BinOp");
18582 return Intrinsic::ppc_atomicrmw_xchg_i128;
18583 case AtomicRMWInst::Add:
18584 return Intrinsic::ppc_atomicrmw_add_i128;
18585 case AtomicRMWInst::Sub:
18586 return Intrinsic::ppc_atomicrmw_sub_i128;
18587 case AtomicRMWInst::And:
18588 return Intrinsic::ppc_atomicrmw_and_i128;
18589 case AtomicRMWInst::Or:
18590 return Intrinsic::ppc_atomicrmw_or_i128;
18591 case AtomicRMWInst::Xor:
18592 return Intrinsic::ppc_atomicrmw_xor_i128;
18594 return Intrinsic::ppc_atomicrmw_nand_i128;
18595 }
18596}
18597
18599 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
18600 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18601 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18602 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18603 Type *ValTy = Incr->getType();
18604 assert(ValTy->getPrimitiveSizeInBits() == 128);
18607 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18608 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
18609 Value *IncrHi =
18610 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
18611 Value *LoHi = Builder.CreateCall(RMW, {AlignedAddr, IncrLo, IncrHi});
18612 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18613 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18614 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18615 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18616 return Builder.CreateOr(
18617 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18618}
18619
18621 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
18622 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18623 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18624 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18625 Type *ValTy = CmpVal->getType();
18626 assert(ValTy->getPrimitiveSizeInBits() == 128);
18627 Function *IntCmpXchg =
18628 Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
18629 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18630 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
18631 Value *CmpHi =
18632 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
18633 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
18634 Value *NewHi =
18635 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
18636 emitLeadingFence(Builder, CI, Ord);
18637 Value *LoHi =
18638 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
18639 emitTrailingFence(Builder, CI, Ord);
18640 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18641 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18642 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18643 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18644 return Builder.CreateOr(
18645 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18646}
unsigned const MachineRegisterInfo * MRI
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall)
#define Success
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
static const unsigned PerfectShuffleTable[6561+1]
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
@ OP_COPY
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
Function Alias Analysis Results
Atomic ordering constants.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:478
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static bool isConstantOrUndef(const SDValue Op)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &S)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static const char AIXSSPCanaryWordName[]
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec, unsigned ValidLaneWidth, const PPCSubtarget &Subtarget)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
const char LLVMTargetMachineRef TM
pre isel intrinsic Pre ISel Intrinsic Lowering
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
This defines the Use class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static bool is64Bit(const char *name)
Value * RHS
Value * LHS
bool isFixed(unsigned ValNo) const
Definition: PPCCCState.h:68
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5196
bool isDenormal() const
Definition: APFloat.h:1296
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1379
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:427
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1485
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1302
APInt abs() const
Get the absolute value.
Definition: APInt.h:1730
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:413
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:449
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1664
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:521
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:726
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:738
@ Add
*p = old + v
Definition: Instructions.h:742
@ Or
*p = old | v
Definition: Instructions.h:750
@ Sub
*p = old - v
Definition: Instructions.h:744
@ And
*p = old & v
Definition: Instructions.h:746
@ Xor
*p = old ^ v
Definition: Instructions.h:752
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:778
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:782
@ Nand
*p = ~(old & v)
Definition: Instructions.h:748
BinOp getOperation() const
Definition: Instructions.h:820
This is an SDNode representing atomic operations.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:318
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition: Constants.h:874
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
bool isVarArg() const
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1259
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1481
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:1951
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1539
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1401
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1474
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1407
unsigned arg_size() const
Definition: InstrTypes.h:1424
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:260
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:900
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:878
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:865
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:677
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:692
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:700
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:674
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:262
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:338
arg_iterator arg_begin()
Definition: Function.h:810
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:341
size_t arg_size() const
Definition: Function.h:843
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:205
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:213
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:666
const GlobalValue * getGlobal() const
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:556
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
bool hasHiddenVisibility() const
Definition: GlobalValue.h:250
StringRef getSection() const
Definition: Globals.cpp:174
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:630
bool hasComdat() const
Definition: GlobalValue.h:241
Type * getValueType() const
Definition: GlobalValue.h:296
bool hasProtectedVisibility() const
Definition: GlobalValue.h:251
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2006
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2499
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1431
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1410
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2010
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1491
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2395
const BasicBlock * getParent() const
Definition: Instruction.h:139
bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
Definition: Instructions.h:177
bool isUnordered() const
Definition: Instructions.h:258
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Context object for machine code objects.
Definition: MCContext.h:76
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:389
Metadata node.
Definition: Metadata.h:1059
Machine Value Type.
SimpleValueType SimpleTy
@ INVALID_SIMPLE_VALUE_TYPE
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
uint64_t getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
const MCContext & getContext() const
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:275
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
unsigned getVarArgsNumFPR() const
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getVarArgsNumGPR() const
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void setVarArgsFrameIndex(int Index)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
void setFramePointerSaveIndex(int Idx)
static bool hasPCRelFlag(unsigned TF)
Definition: PPCInstrInfo.h:287
bool is32BitELFABI() const
Definition: PPCSubtarget.h:219
unsigned descriptorTOCAnchorOffset() const
Definition: PPCSubtarget.h:255
bool isAIXABI() const
Definition: PPCSubtarget.h:214
bool useSoftFloat() const
Definition: PPCSubtarget.h:174
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:142
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:202
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
Definition: PPCSubtarget.h:249
MCRegister getEnvironmentPointerRegister() const
Definition: PPCSubtarget.h:267
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:145
bool isSVR4ABI() const
Definition: PPCSubtarget.h:215
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:134
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:206
bool isLittleEndian() const
Definition: PPCSubtarget.h:181
bool isTargetLinux() const
Definition: PPCSubtarget.h:212
MCRegister getTOCPointerRegister() const
Definition: PPCSubtarget.h:273
MCRegister getStackPointerRegister() const
Definition: PPCSubtarget.h:285
bool is64BitELFABI() const
Definition: PPCSubtarget.h:218
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:155
bool isPredictableSelectIsExpensive() const
Definition: PPCSubtarget.h:291
bool enableMachineScheduler() const override
Scheduling customization.
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:152
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
Definition: PPCSubtarget.h:261
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool shouldInlineQuadwordAtomics() const
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
Definition: SectionKind.h:188
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:720
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:474
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:448
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:730
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:826
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:725
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:771
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:674
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:766
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:797
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:843
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:737
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:384
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:451
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Class to represent struct types.
Definition: DerivedTypes.h:216
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
NegatibleCost
Enum that specifies when a float negation is beneficial.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
TargetOptions Options
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:332
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:61
static Type * getVoidTy(LLVMContext &C)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition: Type.h:246
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1124
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1120
@ TargetConstantPool
Definition: ISDOpcodes.h:168
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:476
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:147
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:714
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1153
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1233
@ STRICT_FCEIL
Definition: ISDOpcodes.h:426
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:977
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1029
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:483
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:790
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:543
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:688
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:477
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:913
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1191
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:937
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:411
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:450
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:620
@ TargetExternalSymbol
Definition: ISDOpcodes.h:169
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1045
@ TargetJumpTable
Definition: ISDOpcodes.h:167
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1213
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:986
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:928
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1075
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1054
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:507
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:349
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1229
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1149
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:164
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:430
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:880
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:651
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:573
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
Definition: ISDOpcodes.h:976
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:424
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:425
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1240
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:969
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1039
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:674
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:888
@ STRICT_FROUND
Definition: ISDOpcodes.h:428
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:736
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:449
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:427
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1095
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:129
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:443
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:465
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:442
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1180
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:470
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1200
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:400
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1092
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:423
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:141
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1144
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1068
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:763
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:493
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:340
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1138
@ STRICT_FRINT
Definition: ISDOpcodes.h:422
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1312
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1197
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:165
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:515
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1539
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1455
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1506
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1486
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1545
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1444
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition: NVPTX.h:96
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition: PPC.h:188
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition: PPC.h:191
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:166
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition: PPC.h:197
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition: PPC.h:148
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition: PPC.h:121
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition: PPC.h:194
@ MO_TPREL_HA
Definition: PPC.h:173
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition: PPC.h:113
@ MO_TLS
Symbol for VK_PPC_TLS fixup attached to an ADD instruction.
Definition: PPC.h:182
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition: PPC.h:140
@ MO_TPREL_LO
Definition: PPC.h:172
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:169
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:160
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition: PPC.h:185
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition: PPC.h:135
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:154
@ MO_HA
Definition: PPC.h:170
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:117
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ RET_GLUE
Return with a glue operand, matched by 'blr'.
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:64
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ XMC_PR
Program Code.
Definition: XCOFF.h:104
@ XTY_ER
External reference.
Definition: XCOFF.h:240
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
constexpr double e
Definition: MathExtras.h:31
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:237
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:269
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ Mul
Product of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1923
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:436
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:452
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:252
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:234
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:373
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:267
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:139
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:351
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:363
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:299
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:359
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:160
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:306
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:311
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:134
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:149
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:319
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:431
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:144
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:66
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)